Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for ephemeral volumes and ingress creation support #1312

Closed
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
8bc85a9
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
949b927
Fixed formatting
blublinsky Aug 11, 2023
2170b96
Downgrade `kind` from to `v0.20.0` to `v0.11.1` (#1313)
architkulkarni Aug 10, 2023
44c0cec
Do not update pod labels if they haven't changed (#1304)
JoshKarpel Aug 11, 2023
c3a0285
[CI] Run sample job YAML tests in buildkite (#1315)
architkulkarni Aug 11, 2023
9bd1a50
Api server makefile (#1301)
z103cb Aug 13, 2023
c8045cc
Upgrade to Go 1.19 (#1325)
kevin85421 Aug 14, 2023
55341f8
Fix release actions (#1323)
anishasthana Aug 14, 2023
3b0e26a
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
c667e89
Fixed formatting
blublinsky Aug 15, 2023
ff17794
Fixed formatting
blublinsky Aug 15, 2023
16d72cb
[Benchmark] KubeRay memory / scalability benchmark (#1324)
kevin85421 Aug 14, 2023
b1420ca
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
ba8bb56
rebased to current
blublinsky Aug 15, 2023
262b3f4
restoring PB numbers
blublinsky Aug 17, 2023
c19fde0
Merge branch 'master' into api_server_volumes_ingress
blublinsky Aug 17, 2023
06ccd09
Bump the golangci-lint version in the api server makefile (#1342)
z103cb Aug 17, 2023
1cbac51
Documentation and example for running simple NLP service on kuberay (…
gvspraveen Aug 17, 2023
8be0a21
Removed use of the of BUILD_FLAGS in apiserver makefile (#1336)
z103cb Aug 18, 2023
e79e0b9
[GCS FT][Refactor] Redefine the behavior for deleting Pods and stop l…
kevin85421 Aug 21, 2023
d6e4a59
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
c2a5efc
minor fixes
blublinsky Aug 22, 2023
81b4187
minor fixes
blublinsky Aug 22, 2023
405fddb
minor fixes
blublinsky Aug 22, 2023
5de4a42
[RayJob] Add runtime env YAML field (#1338)
architkulkarni Aug 22, 2023
3a7a17f
Delete ray_v1alpha1_rayjob.batch-inference.yaml (#1360)
architkulkarni Aug 23, 2023
de8bc26
[Feature] Allow RayCluster Helm chart to specify different images for…
Darren221 Aug 24, 2023
106490e
Bump the golangci-lint version in the api server makefile (#1342)
z103cb Aug 17, 2023
0d3d696
Documentation and example for running simple NLP service on kuberay (…
gvspraveen Aug 17, 2023
1e93a67
Removed use of the of BUILD_FLAGS in apiserver makefile (#1336)
z103cb Aug 18, 2023
40fe9d4
[GCS FT][Refactor] Redefine the behavior for deleting Pods and stop l…
kevin85421 Aug 21, 2023
ed370ba
[RayJob] Add runtime env YAML field (#1338)
architkulkarni Aug 22, 2023
6613d21
Delete ray_v1alpha1_rayjob.batch-inference.yaml (#1360)
architkulkarni Aug 23, 2023
4438d40
[Feature] Allow RayCluster Helm chart to specify different images for…
Darren221 Aug 24, 2023
140e595
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
5d8c341
Fixed error handling
blublinsky Aug 25, 2023
b512c40
Fixed error handling
blublinsky Aug 25, 2023
1a2674d
Fixed error handling
blublinsky Aug 25, 2023
34b5135
Fixed error handling
blublinsky Aug 27, 2023
e208f06
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
1348800
Fixed formatting
blublinsky Aug 11, 2023
c933c37
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
c6dc652
Fixed formatting
blublinsky Aug 15, 2023
522ac3e
Fixed formatting
blublinsky Aug 15, 2023
e42042d
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
6557b59
rebased to current
blublinsky Aug 15, 2023
06b2992
restoring PB numbers
blublinsky Aug 17, 2023
cd4690d
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
c6195bd
minor fixes
blublinsky Aug 22, 2023
402f252
minor fixes
blublinsky Aug 22, 2023
e0910fc
minor fixes
blublinsky Aug 22, 2023
bda0809
Added support for ephemeral volumes and ingress creation support
blublinsky Aug 10, 2023
52bac98
Fixed error handling
blublinsky Aug 25, 2023
300844e
Fixed error handling
blublinsky Aug 25, 2023
8434ea9
Fixed error handling
blublinsky Aug 25, 2023
806d66d
Fixed error handling
blublinsky Aug 27, 2023
ca1d7a3
Fixed error handling
blublinsky Aug 28, 2023
e0aecb2
Merge remote-tracking branch 'origin/api_server_volumes_ingress' into…
blublinsky Aug 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions .buildkite/test-rayjob-sample-yamls.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
- label: Test RayJob Sample YAMLs
instance_size: large
image: golang:1.19
commands:
# Install Go
- export PATH=$PATH:/usr/local/go/bin

# Install kind
- curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.11.1/kind-linux-amd64
- chmod +x ./kind
- mv ./kind /usr/local/bin/kind

# Install Docker
- bash scripts/install-docker.sh

# Delete dangling clusters
- kind delete clusters --all

# Install kubectl.
- curl -LO https://dl.k8s.io/release/v1.27.3/bin/linux/amd64/kubectl
- curl -LO "https://dl.k8s.io/release/v1.27.3/bin/linux/amd64/kubectl.sha256"
- echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
- install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl

# Create the cluster
- time kind create cluster --wait 120s --config tests/framework/config/kind-config-buildkite.yml
- docker ps

# Now the kind node is running, it exposes port 6443 in the dind-daemon network.
- kubectl config set clusters.kind-kind.server https://docker:6443

# Verify that kubectl works
- kubectl version
- kubectl cluster-info
- kubectl get nodes
- kubectl get pods --all-namespaces

# Install Helm
- curl -Lo helm.tar.gz https://get.helm.sh/helm-v3.12.2-linux-amd64.tar.gz
- tar -zxvf helm.tar.gz
- mv linux-amd64/helm /usr/local/bin/helm
- helm repo add kuberay https://ray-project.github.io/kuberay-helm/
- helm repo update

# Install KubeRay operator
- pushd ray-operator
- IMG=kuberay/operator:nightly make docker-image
- kind load docker-image kuberay/operator:nightly
- popd

- pushd helm-chart/kuberay-operator
# Use helm --wait instead of kubectl wait to prevent flakiness. See #618
- helm install kuberay-operator --set image.repository=kuberay/operator --set image.tag=nightly --wait --timeout=5m0s .
- popd

- echo "Kuberay operator successfully installed."

# Delete kind clusters
- kind delete clusters --all

# Install python 3.10 and pip
- apt-get update
- apt-get install -y python3.11 python3.11-venv
- python3 -m venv .venv
- source .venv/bin/activate

# Install requirements
- pip install -r tests/framework/config/requirements.txt

# Run test
- BUILDKITE_ENV=true python3 tests/test_sample_rayjob_yamls.py
4 changes: 4 additions & 0 deletions apiserver/DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ make install
#To use the helm charts
make undeploy


#To use the configuration

make uninstall
```

Expand All @@ -122,8 +124,10 @@ When developing and testing with kind you might want to execute these targets to

```bash
#To create a new API server image and to deploy it on a new cluster

make docker-image cluster load-image deploy


#To create a new API server image, operator image and deploy them on a new cluster
make docker-image operator-image cluster load-image load-operator-image deploy deploy-operator
```
Expand Down
10 changes: 10 additions & 0 deletions apiserver/pkg/model/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ func FromCrdToApiCluster(cluster *v1alpha1.RayCluster, events []v1.Event) *api.C
ClusterState: string(cluster.Status.State),
}

if len(cluster.ObjectMeta.Annotations) > 0 {
pbCluster.Annotations = cluster.ObjectMeta.Annotations
}

// loop container and find the resource
pbCluster.ClusterSpec = PopulateRayClusterSpec(cluster.Spec)

Expand Down Expand Up @@ -146,6 +150,7 @@ func PopulateHeadNodeSpec(spec v1alpha1.HeadGroupSpec) *api.HeadGroupSpec {
ServiceType: string(spec.ServiceType),
Image: spec.Template.Annotations[util.RayClusterImageAnnotationKey],
ComputeTemplate: spec.Template.Annotations[util.RayClusterComputeTemplateAnnotationKey],
Volumes: PopulateVolumes(&spec.Template),
}

for _, annotation := range getNodeDefaultAnnotations() {
Expand All @@ -162,6 +167,10 @@ func PopulateHeadNodeSpec(spec v1alpha1.HeadGroupSpec) *api.HeadGroupSpec {
headNodeSpec.Labels = spec.Template.Labels
}

if *spec.EnableIngress {
Copy link
Contributor

@tedhtchang tedhtchang Aug 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If enableIngress is missing, server crashes with panic: runtime error: invalid memory address or nil pointer dereference error.

Suggested change
if *spec.EnableIngress {
if spec.EnableIngress != nil && *spec.EnableIngress {

Copy link
Contributor

@tedhtchang tedhtchang Aug 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also I think it's better to implement 1 feature per PR. It would be easier to review and merge. For example, here I am testing storage but encountered enabledIngress error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know, sorry, was in a rush

headNodeSpec.EnableIngress = true
}

// Here we update environment only for a container named 'ray-head'
if container, _, ok := util.GetContainerByName(spec.Template.Spec.Containers, "ray-head"); ok && len(container.Env) > 0 {
env := make(map[string]string)
Expand Down Expand Up @@ -196,6 +205,7 @@ func PopulateWorkerNodeSpec(specs []v1alpha1.WorkerGroupSpec) []*api.WorkerGroup
GroupName: spec.GroupName,
Image: spec.Template.Annotations[util.RayClusterImageAnnotationKey],
ComputeTemplate: spec.Template.Annotations[util.RayClusterComputeTemplateAnnotationKey],
Volumes: PopulateVolumes(&spec.Template),
}

for _, annotation := range getNodeDefaultAnnotations() {
Expand Down
28 changes: 27 additions & 1 deletion apiserver/pkg/model/converter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
)

var (
enableIngress = false
enableIngress = true
headNodeReplicas int32 = 1
workerReplicas int32 = 5
)
Expand Down Expand Up @@ -195,6 +195,22 @@ var workerSpecTest = v1alpha1.WorkerGroupSpec{
},
}

var ClusterSpecTest = v1alpha1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "raycluster-sample",
Namespace: "default",
Annotations: map[string]string{
"kubernetes.io/ingress.class": "nginx",
},
},
Spec: v1alpha1.RayClusterSpec{
HeadGroupSpec: headSpecTest,
WorkerGroupSpecs: []v1alpha1.WorkerGroupSpec{
workerSpecTest,
},
},
}

var expectedAnnotations = map[string]string{
"custom": "value",
}
Expand All @@ -220,6 +236,9 @@ func TestPopulateHeadNodeSpec(t *testing.T) {
if groupSpec.ServiceAccount != "account" {
t.Errorf("failed to convert service account")
}
if groupSpec.EnableIngress != *headSpecTest.EnableIngress {
t.Errorf("failed to convert enableIngress")
}
if groupSpec.ImagePullSecret != "foo" {
t.Errorf("failed to convert image pull secret")
}
Expand Down Expand Up @@ -254,6 +273,13 @@ func TestPopulateWorkerNodeSpec(t *testing.T) {
}
}

func TestPopulateRayClusterSpec(t *testing.T) {
cluster := FromCrdToApiCluster(&ClusterSpecTest, []v1.Event{})
if len(cluster.Annotations) != 1 {
t.Errorf("failed to convert cluster's annotations")
}
}

func TestPopulateTemplate(t *testing.T) {
template := FromKubeToAPIComputeTemplate(&configMapWithoutTolerations)
if len(template.Tolerations) != 0 {
Expand Down
103 changes: 103 additions & 0 deletions apiserver/pkg/model/volumes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package model

import (
api "github.com/ray-project/kuberay/proto/go_client"
v1 "k8s.io/api/core/v1"
)

func PopulateVolumes(podTemplate *v1.PodTemplateSpec) []*api.Volume {
if len(podTemplate.Spec.Volumes) == 0 {
return nil
}
var volumes []*api.Volume
for _, vol := range podTemplate.Spec.Volumes {
mount := GetVolumeMount(podTemplate, vol.Name)
if vol.VolumeSource.HostPath != nil {
// Host Path
volumes = append(volumes, &api.Volume{
Name: vol.Name,
MountPath: mount.MountPath,
Source: vol.VolumeSource.HostPath.Path,
MountPropagationMode: GetVolumeMountPropagation(mount),
VolumeType: api.Volume_VolumeType(api.Volume_HOSTTOCONTAINER),
HostPathType: GetVolumeHostPathType(&vol),
})
continue

}
if vol.VolumeSource.PersistentVolumeClaim != nil {
// PVC
volumes = append(volumes, &api.Volume{
Name: vol.Name,
MountPath: mount.MountPath,
MountPropagationMode: GetVolumeMountPropagation(mount),
VolumeType: api.Volume_PERSISTENT_VOLUME_CLAIM,
ReadOnly: vol.VolumeSource.PersistentVolumeClaim.ReadOnly,
})
continue
}
if vol.VolumeSource.Ephemeral != nil {
// Ephimeral
request := vol.VolumeSource.Ephemeral.VolumeClaimTemplate.Spec.Resources.Requests[v1.ResourceStorage]
volume := api.Volume{
Name: vol.Name,
MountPath: mount.MountPath,
MountPropagationMode: GetVolumeMountPropagation(mount),
VolumeType: api.Volume_EPHEMERAL,
AccessMode: GetAccessMode(&vol),
Storage: request.String(),
}
if vol.VolumeSource.Ephemeral.VolumeClaimTemplate.Spec.StorageClassName != nil {
volume.StorageClassName = *vol.VolumeSource.Ephemeral.VolumeClaimTemplate.Spec.StorageClassName
}
volumes = append(volumes, &volume)
continue
}
}
return volumes
}

func GetVolumeMount(podTemplate *v1.PodTemplateSpec, vol string) *v1.VolumeMount {
for _, container := range podTemplate.Spec.Containers {
for _, mount := range container.VolumeMounts {
if mount.Name == vol {
return &mount
}
}
}
return nil
}

func GetVolumeMountPropagation(mount *v1.VolumeMount) api.Volume_MountPropagationMode {
if mount.MountPropagation == nil {
return api.Volume_NONE
}
if *mount.MountPropagation == v1.MountPropagationHostToContainer {
return api.Volume_HOSTTOCONTAINER
}
if *mount.MountPropagation == v1.MountPropagationBidirectional {
return api.Volume_BIDIRECTIONAL
}
return api.Volume_NONE
}

func GetVolumeHostPathType(vol *v1.Volume) api.Volume_HostPathType {
if *vol.VolumeSource.HostPath.Type == v1.HostPathFile {
return api.Volume_FILE
}
return api.Volume_DIRECTORY
}

func GetAccessMode(vol *v1.Volume) api.Volume_AccessMode {
modes := vol.VolumeSource.Ephemeral.VolumeClaimTemplate.Spec.AccessModes
if len(modes) == 0 {
return api.Volume_RWO
}
if modes[0] == v1.ReadOnlyMany {
return api.Volume_ROX
}
if modes[0] == v1.ReadWriteMany {
return api.Volume_RWX
}
return api.Volume_RWO
}
Loading