Skip to content

Commit

Permalink
feat: export prometheus metrics
Browse files Browse the repository at this point in the history
Signed-off-by: ChrisLiu <[email protected]>
  • Loading branch information
chrisliu1995 committed Feb 14, 2023
1 parent f585041 commit 12a4395
Show file tree
Hide file tree
Showing 6 changed files with 347 additions and 6 deletions.
2 changes: 1 addition & 1 deletion config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ bases:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
# - ../prometheus
- ../prometheus

patchesStrategicMerge:
# Protect the /metrics endpoint by putting it behind auth.
Expand Down
3 changes: 3 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ spec:
- --provider-config=/etc/kruise-game/config.toml
image: controller:latest
name: manager
ports:
- name: https
containerPort: 8080
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
4 changes: 0 additions & 4 deletions config/prometheus/monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ spec:
endpoints:
- path: /metrics
port: https
scheme: https
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
control-plane: controller-manager
20 changes: 19 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ import (
kruiseV1beta1 "github.com/openkruise/kruise-api/apps/v1beta1"
"github.com/openkruise/kruise-game/cloudprovider"
cpmanager "github.com/openkruise/kruise-game/cloudprovider/manager"
kruisegameclientset "github.com/openkruise/kruise-game/pkg/client/clientset/versioned"
kruisegamevisions "github.com/openkruise/kruise-game/pkg/client/informers/externalversions"
controller "github.com/openkruise/kruise-game/pkg/controllers"
"github.com/openkruise/kruise-game/pkg/metrics"
"github.com/openkruise/kruise-game/pkg/webhook"
"os"
"time"
Expand Down Expand Up @@ -97,7 +100,8 @@ func main() {
}
}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
restConfig := ctrl.GetConfigOrDie()
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Port: 9443,
Expand Down Expand Up @@ -164,6 +168,20 @@ func main() {
}
}()

kruisegameInformerFactory := kruisegamevisions.NewSharedInformerFactory(kruisegameclientset.NewForConfigOrDie(restConfig), 30*time.Second)
metricsController, err := metrics.NewController(kruisegameInformerFactory)
if err != nil {
setupLog.Error(err, "unable to create metrics controller")
os.Exit(1)
}
kruisegameInformerFactory.Start(signal.Done())
go func() {
if metricsController.Run(signal) != nil {
setupLog.Error(err, "unable to setup metrics controller")
os.Exit(1)
}
}()

setupLog.Info("starting kruise-game-manager")

if err := mgr.Start(signal); err != nil {
Expand Down
232 changes: 232 additions & 0 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
/*
Copyright 2023 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"context"
"errors"
"fmt"
gamekruisev1alpha1 "github.com/openkruise/kruise-game/apis/v1alpha1"
kruisegamevisions "github.com/openkruise/kruise-game/pkg/client/informers/externalversions"
kruisegamelister "github.com/openkruise/kruise-game/pkg/client/listers/apis/v1alpha1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"sync"
"time"
)

type Controller struct {
gameServerLister kruisegamelister.GameServerLister
gameServerSetLister kruisegamelister.GameServerSetLister
gameServerSynced cache.InformerSynced
gameServerSetSynced cache.InformerSynced
stateLock sync.Mutex
opsStateLock sync.Mutex
gameServerStateLastChange map[string]float64
gameServerOpsStateLastChange map[string]float64
}

func NewController(kruisegameInformerFactory kruisegamevisions.SharedInformerFactory) (*Controller, error) {
gameServer := kruisegameInformerFactory.Game().V1alpha1().GameServers()
gsInformer := gameServer.Informer()

gameServerSet := kruisegameInformerFactory.Game().V1alpha1().GameServerSets()
gssInformer := gameServerSet.Informer()

c := &Controller{
gameServerLister: gameServer.Lister(),
gameServerSetLister: gameServerSet.Lister(),
gameServerSynced: gsInformer.HasSynced,
gameServerSetSynced: gssInformer.HasSynced,
stateLock: sync.Mutex{},
opsStateLock: sync.Mutex{},
gameServerStateLastChange: make(map[string]float64),
gameServerOpsStateLastChange: make(map[string]float64),
}

gsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: c.recordGsWhenAdd,
UpdateFunc: c.recordGsWhenUpdate,
DeleteFunc: c.recordGsWhenDelete,
})

gssInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(oldObj, newObj interface{}) {
c.recordGssWhenChange(newObj)
},
DeleteFunc: c.recordGssWhenDelete,
})

return c, nil
}

func (c *Controller) recordGsWhenAdd(obj interface{}) {
gs, ok := obj.(*gamekruisev1alpha1.GameServer)
if !ok {
return
}

c.calcDurationState(gs, "Add")
c.calcDurationOpsState(gs, "Add")

GameServersTotal.WithLabelValues().Inc()

state := string(gs.Status.CurrentState)
opsState := string(gs.Spec.OpsState)
GameServersStateCount.WithLabelValues(state).Inc()
GameServersOpsStateCount.WithLabelValues(opsState).Inc()

dp := 0
up := 0
if gs.Status.DeletionPriority != nil {
dp = gs.Status.DeletionPriority.IntValue()
}
if gs.Status.UpdatePriority != nil {
up = gs.Status.UpdatePriority.IntValue()
}
GameServerDeletionPriority.WithLabelValues(gs.Name, gs.Namespace).Set(float64(dp))
GameServerUpdatePriority.WithLabelValues(gs.Name, gs.Namespace).Set(float64(up))
}

func (c *Controller) recordGsWhenUpdate(oldObj, newObj interface{}) {
oldGs, ok := oldObj.(*gamekruisev1alpha1.GameServer)
if !ok {
return
}

newGs, ok := newObj.(*gamekruisev1alpha1.GameServer)
if !ok {
return
}

oldState := string(oldGs.Status.CurrentState)
oldOpsState := string(oldGs.Spec.OpsState)
newState := string(newGs.Status.CurrentState)
newOpsState := string(newGs.Spec.OpsState)
if oldState != newState {
GameServersStateCount.WithLabelValues(newState).Inc()
GameServersStateCount.WithLabelValues(oldState).Dec()
GameServersStateDuration.WithLabelValues(newGs.Name, newGs.Namespace, oldState).Observe(c.calcDurationState(newGs, "Update"))
}
if oldOpsState != newOpsState {
GameServersOpsStateCount.WithLabelValues(newOpsState).Inc()
GameServersOpsStateCount.WithLabelValues(oldOpsState).Dec()
GameServersOpsStateDuration.WithLabelValues(newGs.Name, newGs.Namespace, oldOpsState).Observe(c.calcDurationOpsState(newGs, "Update"))
}

newDp := 0
newUp := 0
if newGs.Status.DeletionPriority != oldGs.Status.DeletionPriority {
newDp = newGs.Status.DeletionPriority.IntValue()
}
if newGs.Status.UpdatePriority != oldGs.Status.UpdatePriority {
newUp = newGs.Status.UpdatePriority.IntValue()
}
GameServerDeletionPriority.WithLabelValues(newGs.Name, newGs.Namespace).Set(float64(newDp))
GameServerUpdatePriority.WithLabelValues(newGs.Name, newGs.Namespace).Set(float64(newUp))
}

func (c *Controller) recordGsWhenDelete(obj interface{}) {
gs, ok := obj.(*gamekruisev1alpha1.GameServer)
if !ok {
return
}

state := string(gs.Status.CurrentState)
opsState := string(gs.Spec.OpsState)

GameServersStateDuration.WithLabelValues(gs.Name, gs.Namespace, state).Observe(c.calcDurationState(gs, "Delete"))
GameServersOpsStateDuration.WithLabelValues(gs.Name, gs.Namespace, opsState).Observe(c.calcDurationOpsState(gs, "Delete"))

GameServersStateCount.WithLabelValues(state).Dec()
GameServersOpsStateCount.WithLabelValues(opsState).Dec()
GameServerDeletionPriority.DeleteLabelValues(gs.Name, gs.Namespace)
GameServerUpdatePriority.DeleteLabelValues(gs.Name, gs.Namespace)
}

func (c *Controller) recordGssWhenChange(obj interface{}) {
gss, ok := obj.(*gamekruisev1alpha1.GameServerSet)
if !ok {
return
}

GameServerSetsReplicasCount.WithLabelValues(gss.Name, gss.Namespace, "current").Set(float64(gss.Status.CurrentReplicas))
GameServerSetsReplicasCount.WithLabelValues(gss.Name, gss.Namespace, "ready").Set(float64(gss.Status.ReadyReplicas))
GameServerSetsReplicasCount.WithLabelValues(gss.Name, gss.Namespace, "available").Set(float64(gss.Status.AvailableReplicas))
GameServerSetsReplicasCount.WithLabelValues(gss.Name, gss.Namespace, "maintaining").Set(float64(*gss.Status.MaintainingReplicas))
GameServerSetsReplicasCount.WithLabelValues(gss.Name, gss.Namespace, "waitToBeDeleted").Set(float64(*gss.Status.WaitToBeDeletedReplicas))
}

func (c *Controller) recordGssWhenDelete(obj interface{}) {
gss, ok := obj.(*gamekruisev1alpha1.GameServerSet)
if !ok {
return
}

GameServerSetsReplicasCount.DeleteLabelValues(gss.Name, gss.Namespace, "current")
GameServerSetsReplicasCount.DeleteLabelValues(gss.Name, gss.Namespace, "ready")
GameServerSetsReplicasCount.DeleteLabelValues(gss.Name, gss.Namespace, "available")
GameServerSetsReplicasCount.DeleteLabelValues(gss.Name, gss.Namespace, "maintaining")
GameServerSetsReplicasCount.DeleteLabelValues(gss.Name, gss.Namespace, "waitToBeDeleted")
}

func (c *Controller) Run(ctx context.Context) error {
klog.Info("Wait for metrics controller cache sync")
if !cache.WaitForCacheSync(ctx.Done(), c.gameServerSynced, c.gameServerSetSynced) {
return errors.New("failed to wait for caches to sync")
}
<-ctx.Done()
return nil
}

func (c *Controller) calcDurationState(newGs *gamekruisev1alpha1.GameServer, action string) float64 {
currentTime := time.Now().UTC().Sub(newGs.ObjectMeta.CreationTimestamp.Local().UTC()).Seconds()
gsKey := fmt.Sprintf("%s/%s", newGs.ObjectMeta.Namespace, newGs.ObjectMeta.Name)

c.stateLock.Lock()
defer c.stateLock.Unlock()
duration := 0.0
if action == "Add" {
c.gameServerStateLastChange[gsKey] = currentTime
} else {
duration = currentTime - c.gameServerStateLastChange[gsKey]
c.gameServerStateLastChange[gsKey] = currentTime
}
if action == "Delete" {
delete(c.gameServerStateLastChange, gsKey)
}
return duration
}

func (c *Controller) calcDurationOpsState(newGs *gamekruisev1alpha1.GameServer, action string) float64 {
currentTime := time.Now().UTC().Sub(newGs.ObjectMeta.CreationTimestamp.Local().UTC()).Seconds()
gsKey := fmt.Sprintf("%s/%s", newGs.ObjectMeta.Namespace, newGs.ObjectMeta.Name)

c.opsStateLock.Lock()
defer c.opsStateLock.Unlock()
duration := 0.0
if action == "Add" {
c.gameServerOpsStateLastChange[gsKey] = currentTime
} else {
duration = currentTime - c.gameServerOpsStateLastChange[gsKey]
c.gameServerOpsStateLastChange[gsKey] = currentTime
}
if action == "Delete" {
delete(c.gameServerOpsStateLastChange, gsKey)
}
return duration
}
92 changes: 92 additions & 0 deletions pkg/metrics/prometheus_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
Copyright 2023 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

func init() {
metrics.Registry.MustRegister(GameServersStateCount)
metrics.Registry.MustRegister(GameServersOpsStateCount)
metrics.Registry.MustRegister(GameServersTotal)
metrics.Registry.MustRegister(GameServerSetsReplicasCount)
metrics.Registry.MustRegister(GameServersStateDuration)
metrics.Registry.MustRegister(GameServersOpsStateDuration)
metrics.Registry.MustRegister(GameServerDeletionPriority)
metrics.Registry.MustRegister(GameServerUpdatePriority)
}

var (
GameServersStateCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "okg_gameservers_state_count",
Help: "The number of gameservers per state",
},
[]string{"state"},
)
GameServersOpsStateCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "okg_gameservers_opsState_count",
Help: "The number of gameservers per opsState",
},
[]string{"opsState"},
)
GameServersTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "okg_gameservers_total",
Help: "The total of gameservers",
},
[]string{},
)
GameServerSetsReplicasCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "okg_gameserversets_replicas_count",
Help: "The number of replicas per gameserverset)",
},
[]string{"gssName", "gssNs", "gsStatus"},
)
GameServersStateDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "okg_gameservers_state_duration",
Help: "The distribution of gameserver state duration in seconds.)",
},
[]string{"gsName", "gsNs", "state"},
)
GameServersOpsStateDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "okg_gameservers_opsState_duration",
Help: "The distribution of gameserver opsState duration in seconds.)",
},
[]string{"gsName", "gsNs", "opsState"},
)
GameServerDeletionPriority = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "okg_gameserver_deletion_priority",
Help: "The deletionPriority of gameserver.)",
},
[]string{"gsName", "gsNs"},
)
GameServerUpdatePriority = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "okg_gameserver_update_priority",
Help: "The updatePriority of gameserver.)",
},
[]string{"gsName", "gsNs"},
)
)

0 comments on commit 12a4395

Please sign in to comment.