Skip to content

Commit

Permalink
Add telemetry collection of deployment replica count (#1551)
Browse files Browse the repository at this point in the history
Problem: Want to collect deployment replica count as a telemetry datapoint.

Solution: Collect deployment replica count.
  • Loading branch information
bjee19 authored Feb 13, 2024
1 parent dca4d64 commit 25ea723
Show file tree
Hide file tree
Showing 7 changed files with 295 additions and 12 deletions.
1 change: 1 addition & 0 deletions cmd/gateway/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ func createStaticModeCommand() *cobra.Command {
PodIP: podIP,
ServiceName: serviceName.value,
Namespace: namespace,
Name: podName,
},
HealthConfig: config.HealthConfig{
Enabled: !disableHealth,
Expand Down
23 changes: 20 additions & 3 deletions deploy/helm-chart/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,36 @@ rules:
- namespaces
- services
- secrets
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- nodes
verbs:
- list
- watch
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- apiGroups:
- discovery.k8s.io
resources:
Expand Down
23 changes: 20 additions & 3 deletions deploy/manifests/nginx-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,36 @@ rules:
- namespaces
- services
- secrets
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- nodes
verbs:
- list
- watch
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- ""
resources:
- nodes
verbs:
- list
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
- apiGroups:
- discovery.k8s.io
resources:
Expand Down
2 changes: 2 additions & 0 deletions internal/mode/static/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type GatewayPodConfig struct {
ServiceName string
// Namespace is the namespace of this Pod.
Namespace string
// Name is the name of the Pod.
Name string
}

// MetricsConfig specifies the metrics config.
Expand Down
8 changes: 7 additions & 1 deletion internal/mode/static/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/go-logr/logr"
ngxclient "github.com/nginxinc/nginx-plus-go-client/client"
"github.com/prometheus/client_golang/prometheus"
appsv1 "k8s.io/api/apps/v1"
apiv1 "k8s.io/api/core/v1"
discoveryV1 "k8s.io/api/discovery/v1"
apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
Expand Down Expand Up @@ -65,6 +66,7 @@ func init() {
utilruntime.Must(discoveryV1.AddToScheme(scheme))
utilruntime.Must(ngfAPI.AddToScheme(scheme))
utilruntime.Must(apiext.AddToScheme(scheme))
utilruntime.Must(appsv1.AddToScheme(scheme))
}

// nolint:gocyclo
Expand Down Expand Up @@ -214,10 +216,14 @@ func StartManager(cfg config.Config) error {
}

dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{
K8sClientReader: mgr.GetClient(),
K8sClientReader: mgr.GetAPIReader(),
GraphGetter: processor,
ConfigurationGetter: eventHandler,
Version: cfg.Version,
PodNSName: types.NamespacedName{
Namespace: cfg.GatewayPodConfig.Namespace,
Name: cfg.GatewayPodConfig.Name,
},
})
if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker.getReadyCh())); err != nil {
return fmt.Errorf("cannot register telemetry job: %w", err)
Expand Down
48 changes: 47 additions & 1 deletion internal/mode/static/telemetry/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"errors"
"fmt"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane"
Expand Down Expand Up @@ -49,6 +51,7 @@ type Data struct {
ProjectMetadata ProjectMetadata
NodeCount int
NGFResourceCounts NGFResourceCounts
NGFReplicaCount int
}

// DataCollectorConfig holds configuration parameters for DataCollectorImpl.
Expand All @@ -61,6 +64,8 @@ type DataCollectorConfig struct {
ConfigurationGetter ConfigurationGetter
// Version is the NGF version.
Version string
// PodNSName is the NamespacedName of the NGF Pod.
PodNSName types.NamespacedName
}

// DataCollectorImpl is am implementation of DataCollector.
Expand Down Expand Up @@ -89,13 +94,19 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
return Data{}, fmt.Errorf("failed to collect NGF resource counts: %w", err)
}

ngfReplicaCount, err := collectNGFReplicaCount(ctx, c.cfg.K8sClientReader, c.cfg.PodNSName)
if err != nil {
return Data{}, fmt.Errorf("failed to collect NGF replica count: %w", err)
}

data := Data{
NodeCount: nodeCount,
NGFResourceCounts: graphResourceCount,
ProjectMetadata: ProjectMetadata{
Name: "NGF",
Version: c.cfg.Version,
},
NGFReplicaCount: ngfReplicaCount,
}

return data, nil
Expand All @@ -104,7 +115,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) {
var nodes v1.NodeList
if err := k8sClient.List(ctx, &nodes); err != nil {
return 0, err
return 0, fmt.Errorf("failed to get NodeList: %w", err)
}

return len(nodes.Items), nil
Expand Down Expand Up @@ -147,3 +158,38 @@ func collectGraphResourceCount(

return ngfResourceCounts, nil
}

func collectNGFReplicaCount(ctx context.Context, k8sClient client.Reader, podNSName types.NamespacedName) (int, error) {
var pod v1.Pod
if err := k8sClient.Get(
ctx,
types.NamespacedName{Namespace: podNSName.Namespace, Name: podNSName.Name},
&pod,
); err != nil {
return 0, fmt.Errorf("failed to get NGF Pod: %w", err)
}

podOwnerRefs := pod.GetOwnerReferences()
if len(podOwnerRefs) != 1 {
return 0, fmt.Errorf("expected one owner reference of the NGF Pod, got %d", len(podOwnerRefs))
}

if podOwnerRefs[0].Kind != "ReplicaSet" {
return 0, fmt.Errorf("expected pod owner reference to be ReplicaSet, got %s", podOwnerRefs[0].Kind)
}

var replicaSet appsv1.ReplicaSet
if err := k8sClient.Get(
ctx,
types.NamespacedName{Namespace: podNSName.Namespace, Name: podOwnerRefs[0].Name},
&replicaSet,
); err != nil {
return 0, fmt.Errorf("failed to get NGF Pod's ReplicaSet: %w", err)
}

if replicaSet.Spec.Replicas == nil {
return 0, errors.New("replica set replicas was nil")
}

return int(*replicaSet.Spec.Replicas), nil
}
Loading

0 comments on commit 25ea723

Please sign in to comment.