Skip to content

Commit

Permalink
fix selfmon related known issues
Browse files Browse the repository at this point in the history
  • Loading branch information
DuodenumL authored and tonicmuroq committed Aug 17, 2021
1 parent 56b9367 commit 6d3c4ea
Show file tree
Hide file tree
Showing 9 changed files with 1,146 additions and 1,096 deletions.
4 changes: 3 additions & 1 deletion cluster/calcium/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ func (c *Calcium) SetNode(ctx context.Context, opts *types.SetNodeOptions) (*typ
litter.Dump(opts)
opts.Normalize(node)
n = node

n.Available = (opts.StatusOpt == types.TriTrue) || (opts.StatusOpt == types.TriKeep && n.Available)
if !n.Available {
n.Bypass = (opts.BypassOpt == types.TriTrue) || (opts.BypassOpt == types.TriKeep && n.Bypass)
if n.IsDown() {
logger.Errorf(ctx, "[SetNodeAvailable] node marked down: %s", opts.Nodename)
}
if opts.WorkloadsDown {
Expand Down
2,205 changes: 1,113 additions & 1,092 deletions rpc/gen/core.pb.go

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions rpc/gen/core.proto
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ message Node {
map<string, int64> volume = 19;
int64 volume_used = 20;
map<string, int64> init_numa_memory = 21;
bool bypass = 22;
}

message Nodes {
Expand All @@ -178,6 +179,7 @@ message SetNodeOptions {
map<string, int64> delta_volume = 9;
bool workloads_down = 10;
string endpoint = 11;
TriOpt bypass_opt = 12;
}

message SetNodeStatusOptions {
Expand Down
14 changes: 13 additions & 1 deletion rpc/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,21 @@ func (v *Vibranium) ListPodNodes(ctx context.Context, opts *pb.ListNodesOptions)
ctx, cancel := context.WithTimeout(ctx, v.config.GlobalTimeout)
defer cancel()
nodes := []*pb.Node{}
nodeChan := make(chan *pb.Node, len(ns))
wg := &sync.WaitGroup{}
for _, n := range ns {
nodes = append(nodes, toRPCNode(ctx, n))
wg.Add(1)
go func(node *types.Node) {
defer wg.Done()
nodeChan <- toRPCNode(ctx, node)
}(n)
}
wg.Wait()
close(nodeChan)
for node := range nodeChan {
nodes = append(nodes, node)
}

return &pb.Nodes{Nodes: nodes}, nil
}

Expand Down
2 changes: 2 additions & 0 deletions rpc/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func toRPCNode(ctx context.Context, n *types.Node) *pb.Node {
Numa: n.NUMA,
NumaMemory: n.NUMAMemory,
InitNumaMemory: n.InitNUMAMemory,
Bypass: n.Bypass,
}
}

Expand Down Expand Up @@ -153,6 +154,7 @@ func toCoreSetNodeOptions(b *pb.SetNodeOptions) (*types.SetNodeOptions, error) {
DeltaVolume: b.DeltaVolume,
NUMA: b.Numa,
Labels: b.Labels,
BypassOpt: types.TriOptions(b.BypassOpt),
}
for cpuID, cpuShare := range b.DeltaCpu {
r.DeltaCPU[cpuID] = int64(cpuShare)
Expand Down
3 changes: 2 additions & 1 deletion store/etcdv3/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ func (m *Mercury) doAddNode(ctx context.Context, name, endpoint, podname, ca, ce
NUMAMemory: numaMemory,
},
Available: true,
Bypass: false,
}

bytes, err := json.Marshal(node)
Expand Down Expand Up @@ -291,7 +292,7 @@ func (m *Mercury) doGetNodes(ctx context.Context, kvs []*mvccpb.KeyValue, labels
return nil, err
}
node.Init()
if (node.Available || all) && utils.FilterWorkload(node.Labels, labels) {
if (!node.IsDown() || all) && utils.FilterWorkload(node.Labels, labels) {
engine, err := m.makeClient(ctx, node, false)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion store/redis/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func (r *Rediaron) doGetNodes(ctx context.Context, kvs map[string]string, labels
return nil, err
}
node.Init()
if (node.Available || all) && utils.FilterWorkload(node.Labels, labels) {
if (!node.IsDown() || all) && utils.FilterWorkload(node.Labels, labels) {
engine, err := r.makeClient(ctx, node, false)
if err != nil {
return nil, err
Expand Down
9 changes: 9 additions & 0 deletions types/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ type Node struct {
CPUUsed float64 `json:"cpuused"`
VolumeUsed int64 `json:"volumeused"`

// Bypass if bypass is true, it will not participate in future scheduling
Bypass bool `json:"bypass,omitempty"`
Available bool `json:"available"`
Engine engine.API `json:"-"`
}
Expand Down Expand Up @@ -208,6 +210,13 @@ func (n *Node) PreserveResources(resource *ResourceMeta) {
}
}

// IsDown returns if the node is marked as down.
func (n *Node) IsDown() bool {
// If `bypass` is true, then even if the node is still healthy, the node will be regarded as `down`.
// Currently `bypass` will only be set when the cli calls the `up` and `down` commands.
return n.Bypass || !n.Available
}

// NodeMetrics used for metrics collecting
type NodeMetrics struct {
Name string
Expand Down
1 change: 1 addition & 0 deletions types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ type SetNodeOptions struct {
DeltaVolume VolumeMap
NUMA map[string]string
Labels map[string]string
BypassOpt TriOptions
}

// Validate checks options
Expand Down

0 comments on commit 6d3c4ea

Please sign in to comment.