-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
batch node removal #314
batch node removal #314
Changes from all commits
2a698f6
9052b24
4b71030
a9b8541
62d247a
74e5b5f
1acff2b
b4fbadc
e0da85f
b181e32
91a7614
718b94a
cba3448
fb8c852
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -538,11 +538,12 @@ func (c *M3DBController) expandPlacementForSet( | |
return c.addPodsToPlacement(ctx, cluster, podsToAdd) | ||
} | ||
|
||
// shrinkPlacementForSet takes a StatefulSet that needs to be shrunk and | ||
// removes the last pod in the StatefulSet from the active placement, enabling | ||
// shrinkPlacementForSet takes a StatefulSet that needs to be shrunk and removes any pods | ||
// that are above desired instance count in the StatefulSet from the active placement, enabling | ||
// the StatefulSet size to be decreased once the remove completes. | ||
func (c *M3DBController) shrinkPlacementForSet( | ||
cluster *myspec.M3DBCluster, set *appsv1.StatefulSet, pl placement.Placement, | ||
cluster *myspec.M3DBCluster, set *appsv1.StatefulSet, | ||
pl placement.Placement, desiredInstanceCount int, | ||
) error { | ||
if cluster.Spec.PreventScaleDown { | ||
return pkgerrors.Errorf("cannot remove nodes from %s/%s, preventScaleDown is true", | ||
|
@@ -556,49 +557,68 @@ func (c *M3DBController) shrinkPlacementForSet( | |
return err | ||
} | ||
|
||
_, removeInst, err := c.findPodInstanceToRemove(cluster, pl, pods) | ||
_, removeInst, err := c.findPodsAndInstancesToRemove(cluster, pl, pods, desiredInstanceCount) | ||
if err != nil { | ||
c.logger.Error("error finding pod to remove", zap.Error(err)) | ||
c.logger.Error("error finding pods to remove", zap.Error(err)) | ||
return err | ||
} | ||
|
||
c.logger.Info("removing pod from placement", zap.String("instance", removeInst.ID())) | ||
return c.adminClient.placementClientForCluster(cluster).Remove([]string{removeInst.ID()}) | ||
if len(removeInst) == 0 { | ||
c.logger.Info("nothing to remove, skipping remove call") | ||
return nil | ||
} | ||
|
||
removeIds := make([]string, len(removeInst)) | ||
for idx, inst := range removeInst { | ||
removeIds[idx] = inst.ID() | ||
} | ||
c.logger.Info("removing instances from placement", | ||
zap.String("instances", strings.Join(removeIds, ","))) | ||
return c.adminClient.placementClientForCluster(cluster).Remove(removeIds) | ||
} | ||
|
||
// findPodInstanceToRemove returns the pod (and associated placement instace) | ||
// findPodsAndInstancesToRemove returns pods (and associated placement instances) | ||
// with the highest ordinal number in the stateful set AND in the placement, so | ||
// that we remove from the placement the pod that will be deleted when the set | ||
// that we remove from the placement pods that will be deleted when the set | ||
// size is scaled down. | ||
func (c *M3DBController) findPodInstanceToRemove( | ||
func (c *M3DBController) findPodsAndInstancesToRemove( | ||
cluster *myspec.M3DBCluster, | ||
pl placement.Placement, | ||
pods []*corev1.Pod, | ||
) (*corev1.Pod, placement.Instance, error) { | ||
desiredInstanceCount int, | ||
) ([]*corev1.Pod, []placement.Instance, error) { | ||
if len(pods) == 0 { | ||
return nil, nil, errEmptyPodList | ||
} | ||
if desiredInstanceCount < 0 { | ||
msg := fmt.Sprintf("desired instance count is negative: %d", desiredInstanceCount) | ||
return nil, nil, pkgerrors.New(msg) | ||
} | ||
|
||
podIDs, err := sortPods(pods) | ||
if err != nil { | ||
return nil, nil, pkgerrors.WithMessage(err, "cannot sort pods") | ||
} | ||
|
||
for i := len(podIDs) - 1; i >= 0; i-- { | ||
var ( | ||
podsToRemove []*corev1.Pod | ||
instancesToRemove []placement.Instance | ||
) | ||
for i := len(podIDs) - 1; i >= desiredInstanceCount; i-- { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Maybe it's worth moving the check that |
||
pod := podIDs[i].pod | ||
inst, err := c.findPodInPlacement(cluster, pl, pod) | ||
if pkgerrors.Cause(err) == errPodNotInPlacement { | ||
// If the instance is already out of the placement, continue to the next | ||
// If the pod is already out of the placement, continue to the next | ||
// one. | ||
continue | ||
} | ||
if err != nil { | ||
return nil, nil, pkgerrors.WithMessage(err, "error finding pod in placement") | ||
} | ||
return pod, inst, nil | ||
podsToRemove = append(podsToRemove, pod) | ||
instancesToRemove = append(instancesToRemove, inst) | ||
} | ||
|
||
return nil, nil, errNoPodsInPlacement | ||
return podsToRemove, instancesToRemove, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure this is doing what the user intends, do we want to return an error if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the only case where this can happen is if we somehow want to remove more than exists. There is a test case when removeCount is for 4 but we have only 3 instances in placement. In practice this should not happen because we remove only if placement contains more than desired:
. Maybe if we spam expand/shrink we could enter some weird state here. But I think it's hard to reason what will happen anyways. Another approach that potentially can be safer is to instead of having "removeCount" we could pass in "desired" so that we try to reach target state, we could than assert if target state makes sense like "desired" > "currentPodCount" otherwise it looks like expansion. |
||
} | ||
|
||
// findPodInPlacement looks up a pod in the placement. Equality is based on | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we also validate against
desiredInstanceCount = 0
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think 0 should be a valid option. Like we can do with
kubectl set scale 0
. Haven't tried it though if it will work.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How can it be valid in terms of placement - where will the shards be relocated to?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm yeah maybe it does not make sense after all.