Skip to content

Commit

Permalink
fix: worker node can't connect to head node service (#445)
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Su <[email protected]>
  • Loading branch information
pingsutw authored Aug 9, 2022
1 parent 76f96bf commit 939034c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
2 changes: 1 addition & 1 deletion ray-operator/controllers/ray/common/ingress.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func BuildIngressForHeadService(cluster rayiov1alpha1.RayCluster) (*networkingv1
PathType: &pathType,
Backend: networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{
Name: utils.GenerateServiceName(cluster.Name),
Name: utils.CheckName(utils.GenerateServiceName(cluster.Name)),
Port: networkingv1.ServiceBackendPort{
Number: dashboardPort,
},
Expand Down
2 changes: 2 additions & 0 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ func (r *RayClusterReconciler) buildHeadPod(instance rayiov1alpha1.RayCluster) c
podName := strings.ToLower(instance.Name + common.DashSymbol + string(rayiov1alpha1.HeadNode) + common.DashSymbol)
podName = utils.CheckName(podName) // making sure the name is valid
svcName := utils.GenerateServiceName(instance.Name)
svcName = utils.CheckName(svcName)
// The Ray head port used by workers to connect to the cluster (GCS server port for Ray >= 1.11.0, Redis port for older Ray.)
headPort := common.GetHeadPort(instance.Spec.HeadGroupSpec.RayStartParams)
autoscalingEnabled := instance.Spec.EnableInTreeAutoscaling
Expand Down Expand Up @@ -717,6 +718,7 @@ func (r *RayClusterReconciler) buildWorkerPod(instance rayiov1alpha1.RayCluster,
podName := strings.ToLower(instance.Name + common.DashSymbol + string(rayiov1alpha1.WorkerNode) + common.DashSymbol + worker.GroupName + common.DashSymbol)
podName = utils.CheckName(podName) // making sure the name is valid
svcName := utils.GenerateServiceName(instance.Name)
svcName = utils.CheckName(svcName)
// The Ray head port used by workers to connect to the cluster (GCS server port for Ray >= 1.11.0, Redis port for older Ray.)
headPort := common.GetHeadPort(instance.Spec.HeadGroupSpec.RayStartParams)
autoscalingEnabled := instance.Spec.EnableInTreeAutoscaling
Expand Down
6 changes: 3 additions & 3 deletions ray-operator/controllers/ray/utils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ func IsRunningAndReady(pod *corev1.Pod) bool {

// CheckName makes sure the name does not start with a numeric value and the total length is < 63 char
func CheckName(s string) string {
maxLenght := 50 // 63 - (max(8,6) + 5 ) // 6 to 8 char are consumed at the end with "-head-" or -worker- + 5 generated.
maxLength := 50 // 63 - (max(8,6) + 5 ) // 6 to 8 char are consumed at the end with "-head-" or -worker- + 5 generated.

if len(s) > maxLenght {
if len(s) > maxLength {
// shorten the name
offset := int(math.Abs(float64(maxLenght) - float64(len(s))))
offset := int(math.Abs(float64(maxLength) - float64(len(s))))
fmt.Printf("pod name is too long: len = %v, we will shorten it by offset = %v\n", len(s), offset)
s = s[offset:]
}
Expand Down

0 comments on commit 939034c

Please sign in to comment.