-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
batch node removal #314
batch node removal #314
Changes from 9 commits
2a698f6
9052b24
4b71030
a9b8541
62d247a
74e5b5f
1acff2b
b4fbadc
e0da85f
b181e32
91a7614
718b94a
cba3448
fb8c852
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -542,7 +542,7 @@ func (c *M3DBController) expandPlacementForSet( | |
// removes the last pod in the StatefulSet from the active placement, enabling | ||
// the StatefulSet size to be decreased once the remove completes. | ||
func (c *M3DBController) shrinkPlacementForSet( | ||
cluster *myspec.M3DBCluster, set *appsv1.StatefulSet, pl placement.Placement, | ||
cluster *myspec.M3DBCluster, set *appsv1.StatefulSet, pl placement.Placement, removeCount int, | ||
) error { | ||
if cluster.Spec.PreventScaleDown { | ||
return pkgerrors.Errorf("cannot remove nodes from %s/%s, preventScaleDown is true", | ||
|
@@ -556,25 +556,36 @@ func (c *M3DBController) shrinkPlacementForSet( | |
return err | ||
} | ||
|
||
_, removeInst, err := c.findPodInstanceToRemove(cluster, pl, pods) | ||
_, removeInst, err := c.findPodInstancesToRemove(cluster, pl, pods, removeCount) | ||
if err != nil { | ||
c.logger.Error("error finding pod to remove", zap.Error(err)) | ||
return err | ||
} | ||
|
||
c.logger.Info("removing pod from placement", zap.String("instance", removeInst.ID())) | ||
return c.adminClient.placementClientForCluster(cluster).Remove([]string{removeInst.ID()}) | ||
if len(removeInst) == 0 { | ||
c.logger.Info("nothing to remove, skipping remove call") | ||
return nil | ||
} | ||
|
||
removeIds := make([]string, len(removeInst)) | ||
for idx, inst := range removeInst { | ||
removeIds[idx] = inst.ID() | ||
} | ||
c.logger.Info("removing instances from placement", | ||
zap.String("instances", strings.Join(removeIds, ","))) | ||
return c.adminClient.placementClientForCluster(cluster).Remove(removeIds) | ||
} | ||
|
||
// findPodInstanceToRemove returns the pod (and associated placement instace) | ||
// findPodInstancesToRemove returns the pod (and associated placement instace) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: can you update the comment to reflect that this will return multiple pods / instances now? |
||
// with the highest ordinal number in the stateful set AND in the placement, so | ||
// that we remove from the placement the pod that will be deleted when the set | ||
// size is scaled down. | ||
func (c *M3DBController) findPodInstanceToRemove( | ||
func (c *M3DBController) findPodInstancesToRemove( | ||
cluster *myspec.M3DBCluster, | ||
pl placement.Placement, | ||
pods []*corev1.Pod, | ||
) (*corev1.Pod, placement.Instance, error) { | ||
removeCount int, | ||
) ([]*corev1.Pod, []placement.Instance, error) { | ||
if len(pods) == 0 { | ||
return nil, nil, errEmptyPodList | ||
} | ||
|
@@ -584,7 +595,12 @@ func (c *M3DBController) findPodInstanceToRemove( | |
return nil, nil, pkgerrors.WithMessage(err, "cannot sort pods") | ||
} | ||
|
||
for i := len(podIDs) - 1; i >= 0; i-- { | ||
var ( | ||
podsToRemove []*corev1.Pod | ||
instancesToRemove []placement.Instance | ||
leftToRemove = removeCount | ||
) | ||
for i := len(podIDs) - 1; i >= 0 && leftToRemove > 0; i-- { | ||
pod := podIDs[i].pod | ||
inst, err := c.findPodInPlacement(cluster, pl, pod) | ||
if pkgerrors.Cause(err) == errPodNotInPlacement { | ||
|
@@ -595,10 +611,11 @@ func (c *M3DBController) findPodInstanceToRemove( | |
if err != nil { | ||
return nil, nil, pkgerrors.WithMessage(err, "error finding pod in placement") | ||
} | ||
return pod, inst, nil | ||
leftToRemove-- | ||
podsToRemove = append(podsToRemove, pod) | ||
instancesToRemove = append(instancesToRemove, inst) | ||
} | ||
|
||
return nil, nil, errNoPodsInPlacement | ||
return podsToRemove, instancesToRemove, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure this is doing what the user intends, do we want to return an error if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the only case where this can happen is if we somehow want to remove more than exists. There is a test case when removeCount is for 4 but we have only 3 instances in placement. In practice this should not happen because we remove only if placement contains more than desired:
. Maybe if we spam expand/shrink we could enter some weird state here. But I think it's hard to reason what will happen anyways. Another approach that potentially can be safer is to instead of having "removeCount" we could pass in "desired" so that we try to reach target state, we could than assert if target state makes sense like "desired" > "currentPodCount" otherwise it looks like expansion. |
||
} | ||
|
||
// findPodInPlacement looks up a pod in the placement. Equality is based on | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
super nit:
s/pod/pods