Skip to content

Commit

Permalink
Add controller performance metrics (#391)
Browse files Browse the repository at this point in the history
* Added GameServer control performance metrics;
Updated Grafana dashboard

* Update monitoring documentation;
Refactor non-active handling

* Update yaml

* Fix capitalization

* Revert extra changes triggering installfile alret

* Handle dereferencing;
Add util funciton;
Remove repeated lines

* Add pointers

* Decreasing time diff

* PR Updates;
Rename deleteSum;
Fix GS state update;

* Add patching exception

* Change metric emission to nodeagent

* Update dashboard

* Revert test

* Cleanup deletes

* Minor tweaks

* Conditional

* PR Suggested changes

* Remove spacing added to gameserverbuild

* Remove empty line in nodeagent

* Renaming

* Update dashboard

* Remove metric

Co-authored-by: Dimitris-Ilias Gkanatsios <[email protected]>
  • Loading branch information
dsmith111 and dgkanatsios authored Sep 26, 2022
1 parent e038492 commit 15336f7
Show file tree
Hide file tree
Showing 6 changed files with 1,006 additions and 695 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,8 @@ installfilesdev

.uptodate

# vscode settings
.vscode

# allocator compiled plugin
kubectl-gameserver
4 changes: 4 additions & 0 deletions cmd/nodeagent/nodeagentmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,12 @@ func (n *NodeAgentManager) updateHealthAndStateIfNeeded(ctx context.Context, hb
now := metav1.Time{Time: n.nowFunc()}
if hb.CurrentGameState == GameStateInitializing {
status.ReachedInitializingOn = &now
timeDif := time.Now().UnixMilli() - gsd.CreationTime
GameServerReachedInitializingDuration.WithLabelValues(gsd.BuildName).Set(float64(timeDif))
} else if hb.CurrentGameState == GameStateStandingBy {
status.ReachedStandingByOn = &now
timeDif := time.Now().UnixMilli() - gsd.CreationTime
GameServerReachedStandingByDuration.WithLabelValues(gsd.BuildName).Set(float64(timeDif))
}
}

Expand Down
18 changes: 18 additions & 0 deletions cmd/nodeagent/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,24 @@ var (
Name: "connected_players",
Help: "Number of connected players per GameServer",
}, []string{"namespace", "ServerName", "BuildName"})

GameServerReachedStandingByDuration = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "thundernetes",
Name: "gameserver_standing_by_duration",
Help: "Time taken for a GameServer to reach StandingBy",
},
[]string{"BuildName"},
)

GameServerReachedInitializingDuration = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "thundernetes",
Name: "gameserver_initialization_duration",
Help: "Time taken for a GameServer to reach initialization",
},
[]string{"BuildName"},
)
)

// HeartbeatRequest contains data for the heartbeat request coming from the GSDK running alongside GameServer
Expand Down
2 changes: 2 additions & 0 deletions docs/howtos/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ There is a custom Grafana dashboard example that visualizes some of this data in
| --- | --- | --- |
| gameserver_states | Gauge | nodeagent |
| connected_players | Gauge | nodeagent |
| gameserver_initialization_duration | Gauge | nodeagent |
| gameserver_standing_by_duration | Gauge | nodeagent |
| gameservers_current_state_per_build | Gauge | controller-manager |
| gameservers_created_total | Counter | controller-manager |
| gameservers_sessionended_total | Counter | controller-manager |
Expand Down
15 changes: 7 additions & 8 deletions pkg/operator/controllers/gameserverbuild_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,18 @@ func (r *GameServerBuildReconciler) Reconcile(ctx context.Context, req ctrl.Requ
// calculate the total amount of servers not in the active state
nonActiveGameServersCount := standingByCount + initializingCount + pendingCount

// Evaluate desired number of servers against actual
var totalNumberOfGameServersToDelete int = 0
// user has decreased standingBy numbers
if nonActiveGameServersCount > gsb.Spec.StandingBy {
totalNumberOfGameServersToDelete := int(math.Min(float64(nonActiveGameServersCount-gsb.Spec.StandingBy), maxNumberOfGameServersToDelete))
err := r.deleteNonActiveGameServers(ctx, &gsb, &gameServers, totalNumberOfGameServersToDelete)
if err != nil {
return ctrl.Result{}, err
}
totalNumberOfGameServersToDelete += int(math.Min(float64(nonActiveGameServersCount-gsb.Spec.StandingBy), maxNumberOfGameServersToDelete))
}

// we need to check if we are above the max
// we also need to check if we are above the max
// this can happen if the user modifies the spec.Max during the GameServerBuild's lifetime
if nonActiveGameServersCount+activeCount > gsb.Spec.Max {
totalNumberOfGameServersToDelete := int(math.Min(float64(nonActiveGameServersCount+activeCount-gsb.Spec.Max), maxNumberOfGameServersToDelete))
totalNumberOfGameServersToDelete += int(math.Min(float64(totalNumberOfGameServersToDelete+(nonActiveGameServersCount+activeCount-gsb.Spec.Max)), maxNumberOfGameServersToDelete))
}
if totalNumberOfGameServersToDelete > 0 {
err := r.deleteNonActiveGameServers(ctx, &gsb, &gameServers, totalNumberOfGameServersToDelete)
if err != nil {
return ctrl.Result{}, err
Expand Down
Loading

0 comments on commit 15336f7

Please sign in to comment.