diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index 4d8fb8ce9..ac97185fd 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -153,6 +153,7 @@ func createStaticModeCommand() *cobra.Command { PodIP: podIP, ServiceName: serviceName.value, Namespace: namespace, + Name: podName, }, HealthConfig: config.HealthConfig{ Enabled: !disableHealth, diff --git a/deploy/helm-chart/templates/rbac.yaml b/deploy/helm-chart/templates/rbac.yaml index 97963ed1b..f539c2805 100644 --- a/deploy/helm-chart/templates/rbac.yaml +++ b/deploy/helm-chart/templates/rbac.yaml @@ -21,12 +21,23 @@ rules: - namespaces - services - secrets - # FIXME(bjee19): make nodes permission dependent on telemetry being enabled. - # https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. - - nodes verbs: - list - watch +# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled. +# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - "" + resources: + - nodes + verbs: + - list - apiGroups: - "" resources: @@ -34,6 +45,12 @@ rules: verbs: - create - patch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get - apiGroups: - discovery.k8s.io resources: diff --git a/deploy/manifests/nginx-gateway.yaml b/deploy/manifests/nginx-gateway.yaml index de2c2c1b1..578e4950b 100644 --- a/deploy/manifests/nginx-gateway.yaml +++ b/deploy/manifests/nginx-gateway.yaml @@ -32,12 +32,23 @@ rules: - namespaces - services - secrets - # FIXME(bjee19): make nodes permission dependent on telemetry being enabled. - # https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. - - nodes verbs: - list - watch +# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled. +# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - "" + resources: + - nodes + verbs: + - list - apiGroups: - "" resources: @@ -45,6 +56,12 @@ rules: verbs: - create - patch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get - apiGroups: - discovery.k8s.io resources: diff --git a/internal/mode/static/config/config.go b/internal/mode/static/config/config.go index abbec1faa..cb1ee1efb 100644 --- a/internal/mode/static/config/config.go +++ b/internal/mode/static/config/config.go @@ -48,6 +48,8 @@ type GatewayPodConfig struct { ServiceName string // Namespace is the namespace of this Pod. Namespace string + // Name is the name of the Pod. + Name string } // MetricsConfig specifies the metrics config. diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 712bb6c46..5967190cb 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -8,6 +8,7 @@ import ( "github.com/go-logr/logr" ngxclient "github.com/nginxinc/nginx-plus-go-client/client" "github.com/prometheus/client_golang/prometheus" + appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" discoveryV1 "k8s.io/api/discovery/v1" apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -65,6 +66,7 @@ func init() { utilruntime.Must(discoveryV1.AddToScheme(scheme)) utilruntime.Must(ngfAPI.AddToScheme(scheme)) utilruntime.Must(apiext.AddToScheme(scheme)) + utilruntime.Must(appsv1.AddToScheme(scheme)) } // nolint:gocyclo @@ -214,10 +216,14 @@ func StartManager(cfg config.Config) error { } dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{ - K8sClientReader: mgr.GetClient(), + K8sClientReader: mgr.GetAPIReader(), GraphGetter: processor, ConfigurationGetter: eventHandler, Version: cfg.Version, + PodNSName: types.NamespacedName{ + Namespace: cfg.GatewayPodConfig.Namespace, + Name: cfg.GatewayPodConfig.Name, + }, }) if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker.getReadyCh())); err != nil { return fmt.Errorf("cannot register telemetry job: %w", err) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 416b004ee..6842b5b62 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -5,7 +5,9 @@ import ( "errors" "fmt" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" @@ -49,6 +51,7 @@ type Data struct { ProjectMetadata ProjectMetadata NodeCount int NGFResourceCounts NGFResourceCounts + NGFReplicaCount int } // DataCollectorConfig holds configuration parameters for DataCollectorImpl. @@ -61,6 +64,8 @@ type DataCollectorConfig struct { ConfigurationGetter ConfigurationGetter // Version is the NGF version. Version string + // PodNSName is the NamespacedName of the NGF Pod. + PodNSName types.NamespacedName } // DataCollectorImpl is am implementation of DataCollector. @@ -89,6 +94,11 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { return Data{}, fmt.Errorf("failed to collect NGF resource counts: %w", err) } + ngfReplicaCount, err := collectNGFReplicaCount(ctx, c.cfg.K8sClientReader, c.cfg.PodNSName) + if err != nil { + return Data{}, fmt.Errorf("failed to collect NGF replica count: %w", err) + } + data := Data{ NodeCount: nodeCount, NGFResourceCounts: graphResourceCount, @@ -96,6 +106,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { Name: "NGF", Version: c.cfg.Version, }, + NGFReplicaCount: ngfReplicaCount, } return data, nil @@ -104,7 +115,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) { var nodes v1.NodeList if err := k8sClient.List(ctx, &nodes); err != nil { - return 0, err + return 0, fmt.Errorf("failed to get NodeList: %w", err) } return len(nodes.Items), nil @@ -147,3 +158,38 @@ func collectGraphResourceCount( return ngfResourceCounts, nil } + +func collectNGFReplicaCount(ctx context.Context, k8sClient client.Reader, podNSName types.NamespacedName) (int, error) { + var pod v1.Pod + if err := k8sClient.Get( + ctx, + types.NamespacedName{Namespace: podNSName.Namespace, Name: podNSName.Name}, + &pod, + ); err != nil { + return 0, fmt.Errorf("failed to get NGF Pod: %w", err) + } + + podOwnerRefs := pod.GetOwnerReferences() + if len(podOwnerRefs) != 1 { + return 0, fmt.Errorf("expected one owner reference of the NGF Pod, got %d", len(podOwnerRefs)) + } + + if podOwnerRefs[0].Kind != "ReplicaSet" { + return 0, fmt.Errorf("expected pod owner reference to be ReplicaSet, got %s", podOwnerRefs[0].Kind) + } + + var replicaSet appsv1.ReplicaSet + if err := k8sClient.Get( + ctx, + types.NamespacedName{Namespace: podNSName.Namespace, Name: podOwnerRefs[0].Name}, + &replicaSet, + ); err != nil { + return 0, fmt.Errorf("failed to get NGF Pod's ReplicaSet: %w", err) + } + + if replicaSet.Spec.Replicas == nil { + return 0, errors.New("replica set replicas was nil") + } + + return int(*replicaSet.Spec.Replicas), nil +} diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index e30eb1376..a2afb16ff 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -4,9 +4,11 @@ import ( "context" "errors" "fmt" + "reflect" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -39,6 +41,27 @@ func createListCallsFunc(nodes []v1.Node) func( } } +func createGetCallsFunc(objects ...client.Object) func( + context.Context, + types.NamespacedName, + client.Object, + ...client.GetOption, +) error { + return func(_ context.Context, _ types.NamespacedName, object client.Object, option ...client.GetOption) error { + Expect(option).To(BeEmpty()) + + for _, obj := range objects { + if reflect.TypeOf(obj) == reflect.TypeOf(object) { + reflect.ValueOf(object).Elem().Set(reflect.ValueOf(obj).Elem()) + return nil + } + } + + Fail(fmt.Sprintf("unknown type: %T", object)) + return nil + } +} + var _ = Describe("Collector", Ordered, func() { var ( k8sClientReader *eventsfakes.FakeReader @@ -48,11 +71,38 @@ var _ = Describe("Collector", Ordered, func() { version string expData telemetry.Data ctx context.Context + podNSName types.NamespacedName + ngfPod *v1.Pod + ngfReplicaSet *appsv1.ReplicaSet ) BeforeAll(func() { ctx = context.Background() version = "1.1" + + ngfPod = &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "ReplicaSet", + Name: "replicaset1", + }, + }, + }, + } + + replicas := int32(1) + ngfReplicaSet = &appsv1.ReplicaSet{ + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + + podNSName = types.NamespacedName{ + Namespace: "nginx-gateway", + Name: "ngf-pod", + } }) BeforeEach(func() { @@ -60,6 +110,7 @@ var _ = Describe("Collector", Ordered, func() { ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 0, NGFResourceCounts: telemetry.NGFResourceCounts{}, + NGFReplicaCount: 1, } k8sClientReader = &eventsfakes.FakeReader{} @@ -74,7 +125,9 @@ var _ = Describe("Collector", Ordered, func() { GraphGetter: fakeGraphGetter, ConfigurationGetter: fakeConfigurationGetter, Version: version, + PodNSName: podNSName, }) + k8sClientReader.GetCalls(createGetCallsFunc(ngfPod, ngfReplicaSet)) }) Describe("Normal case", func() { @@ -216,10 +269,11 @@ var _ = Describe("Collector", Ordered, func() { }) When("it encounters an error while collecting data", func() { It("should error on kubernetes client api errors", func() { - k8sClientReader.ListReturns(errors.New("there was an error")) + expectedError := errors.New("there was an error getting NodeList") + k8sClientReader.ListReturns(expectedError) _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(expectedError)) }) }) }) @@ -348,17 +402,157 @@ var _ = Describe("Collector", Ordered, func() { fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) }) It("should error on nil latest graph", func() { + expectedError := errors.New("latest graph cannot be nil") fakeGraphGetter.GetLatestGraphReturns(nil) _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(expectedError)) }) It("should error on nil latest configuration", func() { + expectedError := errors.New("latest configuration cannot be nil") fakeConfigurationGetter.GetLatestConfigurationReturns(nil) _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(expectedError)) + }) + }) + }) + }) + + Describe("NGF replica count collector", func() { + When("collecting NGF replica count", func() { + When("it encounters an error while collecting data", func() { + It("should error if the kubernetes client errored when getting the Pod", func() { + expectedErr := errors.New("there was an error getting the Pod") + k8sClientReader.GetCalls( + func(_ context.Context, _ client.ObjectKey, object client.Object, option ...client.GetOption) error { + Expect(option).To(BeEmpty()) + + switch typedObj := object.(type) { + case *v1.Pod: + return expectedErr + default: + Fail(fmt.Sprintf("unknown type: %T", typedObj)) + } + return nil + }) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) + }) + + It("should error if the Pod's owner reference is nil", func() { + expectedErr := errors.New("expected one owner reference of the NGF Pod, got 0") + k8sClientReader.GetCalls(createGetCallsFunc( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: nil, + }, + }, + )) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) + }) + + It("should error if the Pod has multiple owner references", func() { + expectedErr := errors.New("expected one owner reference of the NGF Pod, got 2") + k8sClientReader.GetCalls(createGetCallsFunc( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "ReplicaSet", + Name: "replicaset1", + }, + { + Kind: "ReplicaSet", + Name: "replicaset2", + }, + }, + }, + }, + )) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) + }) + + It("should error if the Pod's owner reference is not a ReplicaSet", func() { + expectedErr := errors.New("expected pod owner reference to be ReplicaSet, got Deployment") + k8sClientReader.GetCalls(createGetCallsFunc( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "Deployment", + Name: "deployment1", + }, + }, + }, + }, + )) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) + }) + + It("should error if the replica set's replicas is nil", func() { + expectedErr := errors.New("replica set replicas was nil") + k8sClientReader.GetCalls(createGetCallsFunc( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "ReplicaSet", + Name: "replicaset1", + }, + }, + }, + }, + &appsv1.ReplicaSet{ + Spec: appsv1.ReplicaSetSpec{ + Replicas: nil, + }, + }, + )) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) + }) + + It("should error if the kubernetes client errored when getting the ReplicaSet", func() { + expectedErr := errors.New("there was an error getting the ReplicaSet") + k8sClientReader.GetCalls( + func(_ context.Context, _ client.ObjectKey, object client.Object, option ...client.GetOption) error { + Expect(option).To(BeEmpty()) + + switch typedObj := object.(type) { + case *v1.Pod: + typedObj.ObjectMeta = metav1.ObjectMeta{ + Name: "pod1", + OwnerReferences: []metav1.OwnerReference{ + { + Kind: "ReplicaSet", + Name: "replicaset1", + }, + }, + } + case *appsv1.ReplicaSet: + return expectedErr + default: + Fail(fmt.Sprintf("unknown type: %T", typedObj)) + } + return nil + }) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(MatchError(expectedErr)) }) }) })