From 35dcb3dd2722a344bf1b4060c8f64fe6ddafadf2 Mon Sep 17 00:00:00 2001 From: Shashank Ram Date: Tue, 27 Jul 2021 17:26:19 -0700 Subject: [PATCH] smi/health: fix race condition leading to controller crash This change fixes a bug where a pkg scoped map is being concurrently modified to check if SMI API resources are available. Additionally, it refactors the code for clarity and better unit testing. Signed-off-by: Shashank Ram --- cmd/osm-controller/osm-controller.go | 2 +- pkg/ingress/client_test.go | 36 ++---- pkg/k8s/fake.go | 19 +++ pkg/smi/health.go | 59 +++++---- pkg/smi/health_test.go | 172 +++++++++++++++++++++++---- 5 files changed, 211 insertions(+), 77 deletions(-) create mode 100644 pkg/k8s/fake.go diff --git a/cmd/osm-controller/osm-controller.go b/cmd/osm-controller/osm-controller.go index d381a6e84e..2eaab226a7 100644 --- a/cmd/osm-controller/osm-controller.go +++ b/cmd/osm-controller/osm-controller.go @@ -251,7 +251,7 @@ func main() { clientset := extensionsClientset.NewForConfigOrDie(kubeConfig) // Health/Liveness probes - funcProbes := []health.Probes{xdsServer, smi.HealthChecker{SMIClientset: clientset}} + funcProbes := []health.Probes{xdsServer, smi.HealthChecker{DiscoveryClient: clientset.Discovery()}} httpServer.AddHandlers(map[string]http.Handler{ "/health/ready": health.ReadinessHandler(funcProbes, getHTTPHealthProbes()), "/health/alive": health.LivenessHandler(funcProbes, getHTTPHealthProbes()), diff --git a/pkg/ingress/client_test.go b/pkg/ingress/client_test.go index 9578883e0a..3d666655fd 100644 --- a/pkg/ingress/client_test.go +++ b/pkg/ingress/client_test.go @@ -23,18 +23,6 @@ import ( "github.com/openservicemesh/osm/pkg/service" ) -type fakeDiscoveryClient struct { - discovery.ServerResourcesInterface - resources map[string]metav1.APIResourceList - err error -} - -// ServerResourcesForGroupVersion returns the supported resources for a group and version. -func (f *fakeDiscoveryClient) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) { - resp := f.resources[groupVersion] - return &resp, f.err -} - func TestGetSupportedIngressVersions(t *testing.T) { type testCase struct { name string @@ -46,8 +34,8 @@ func TestGetSupportedIngressVersions(t *testing.T) { testCases := []testCase{ { name: "k8s API server supports both ingress v1 and v1beta", - discoveryClient: &fakeDiscoveryClient{ - resources: map[string]metav1.APIResourceList{ + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ "networking.k8s.io/v1": {APIResources: []metav1.APIResource{ {Kind: "Ingress"}, {Kind: "NetworkPolicy"}, @@ -56,7 +44,7 @@ func TestGetSupportedIngressVersions(t *testing.T) { {Kind: "Ingress"}, }}, }, - err: nil, + Err: nil, }, expectedVersions: map[string]bool{ "networking.k8s.io/v1": true, @@ -66,8 +54,8 @@ func TestGetSupportedIngressVersions(t *testing.T) { }, { name: "k8s API server supports only ingress v1beta1", - discoveryClient: &fakeDiscoveryClient{ - resources: map[string]metav1.APIResourceList{ + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ "networking.k8s.io/v1": {APIResources: []metav1.APIResource{ {Kind: "NetworkPolicy"}, }}, @@ -75,7 +63,7 @@ func TestGetSupportedIngressVersions(t *testing.T) { {Kind: "Ingress"}, }}, }, - err: nil, + Err: nil, }, expectedVersions: map[string]bool{ "networking.k8s.io/v1": false, @@ -85,8 +73,8 @@ func TestGetSupportedIngressVersions(t *testing.T) { }, { name: "k8s API server supports only ingress v1", - discoveryClient: &fakeDiscoveryClient{ - resources: map[string]metav1.APIResourceList{ + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ "networking.k8s.io/v1": {APIResources: []metav1.APIResource{ {Kind: "Ingress"}, }}, @@ -94,7 +82,7 @@ func TestGetSupportedIngressVersions(t *testing.T) { {Kind: "NetworkPolicy"}, }}, }, - err: nil, + Err: nil, }, expectedVersions: map[string]bool{ "networking.k8s.io/v1": true, @@ -104,9 +92,9 @@ func TestGetSupportedIngressVersions(t *testing.T) { }, { name: "k8s API server returns an error", - discoveryClient: &fakeDiscoveryClient{ - resources: map[string]metav1.APIResourceList{}, - err: errors.New("fake error"), + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{}, + Err: errors.New("fake error"), }, expectedVersions: nil, exepectError: true, diff --git a/pkg/k8s/fake.go b/pkg/k8s/fake.go new file mode 100644 index 0000000000..fc6f0c4530 --- /dev/null +++ b/pkg/k8s/fake.go @@ -0,0 +1,19 @@ +package k8s + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/discovery" +) + +// FakeDiscoveryClient is a fake client for k8s API discovery +type FakeDiscoveryClient struct { + discovery.ServerResourcesInterface + Resources map[string]metav1.APIResourceList + Err error +} + +// ServerResourcesForGroupVersion returns the supported resources for a group and version. +func (f *FakeDiscoveryClient) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) { + resp := f.Resources[groupVersion] + return &resp, f.Err +} diff --git a/pkg/smi/health.go b/pkg/smi/health.go index dc43f48ccf..5a99579084 100644 --- a/pkg/smi/health.go +++ b/pkg/smi/health.go @@ -4,62 +4,69 @@ import ( smiAccess "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/access/v1alpha3" smiSpecs "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/specs/v1alpha4" smiSplit "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/split/v1alpha2" - extensionsClientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" - - "github.com/openservicemesh/osm/pkg/version" + "k8s.io/client-go/discovery" ) var ( - // The key is the API Resource.Kind and the value is the SMI CRD Type - reqKinds = map[string]string{ - "HTTPRouteGroup": "specs", // Full CRD API Group: specs.smi-spec.io - "TCPRoute": "specs", // Full CRD API Group: specs.smi-spec.io - "TrafficSplit": "split", // Full CRD API Group: split.smi-spec.io - "TrafficTarget": "access", // Full CRD API Group: access.smi-spec.io + // requiredResourceKindGroupMap is a mapping of the required resource kind to it's API group + requiredResourceKindGroupMap = map[string]string{ + "HTTPRouteGroup": smiSpecs.SchemeGroupVersion.String(), + "TCPRoute": smiSpecs.SchemeGroupVersion.String(), + "TrafficSplit": smiSplit.SchemeGroupVersion.String(), + "TrafficTarget": smiAccess.SchemeGroupVersion.String(), + } + smiAPIGroupVersions = []string{ + smiSpecs.SchemeGroupVersion.String(), + smiAccess.SchemeGroupVersion.String(), + smiSpecs.SchemeGroupVersion.String(), + smiSplit.SchemeGroupVersion.String(), } - candidateVersions = []string{smiSpecs.SchemeGroupVersion.String(), smiAccess.SchemeGroupVersion.String(), smiSpecs.SchemeGroupVersion.String(), smiSplit.SchemeGroupVersion.String()} ) // HealthChecker has SMI clientset interface to access SMI CRDS type HealthChecker struct { - SMIClientset extensionsClientset.Interface + DiscoveryClient discovery.ServerResourcesInterface } // Liveness is the Kubernetes liveness probe handler. -func (smi HealthChecker) Liveness() bool { - return checkSMICrdsExist(smi.SMIClientset, reqKinds, candidateVersions) +func (c HealthChecker) Liveness() bool { + return c.requiredAPIResourcesExist() } // Readiness is the Kubernetes readiness probe handler. -func (smi HealthChecker) Readiness() bool { - return checkSMICrdsExist(smi.SMIClientset, reqKinds, candidateVersions) +func (c HealthChecker) Readiness() bool { + return c.requiredAPIResourcesExist() } // GetID returns the ID of the probe -func (smi HealthChecker) GetID() string { +func (c HealthChecker) GetID() string { return "SMI" } -func checkSMICrdsExist(clientset extensionsClientset.Interface, reqKinds map[string]string, candidateVersions []string) bool { - client := clientset.Discovery() - for _, groupVersion := range candidateVersions { - list, err := client.ServerResourcesForGroupVersion(groupVersion) +// requiredAPIResourcesExist returns true if the required API resources are available on the API server +func (c HealthChecker) requiredAPIResourcesExist() bool { + resourceKindAvailable := make(map[string]bool) + for resourceKind := range requiredResourceKindGroupMap { + resourceKindAvailable[resourceKind] = false + } + for _, groupVersion := range smiAPIGroupVersions { + list, err := c.DiscoveryClient.ServerResourcesForGroupVersion(groupVersion) if err != nil { log.Error().Err(err).Msgf("Error getting resources for groupVersion %s", groupVersion) return false } for _, resource := range list.APIResources { - crdName := resource.Kind - delete(reqKinds, crdName) + resourceKindAvailable[resource.Kind] = true } } - if len(reqKinds) != 0 { - for missingCRD := range reqKinds { - log.Error().Err(errSMICrds).Msgf("Missing SMI CRD: %s. To manually install %s, do `kubectl apply -f https://raw.githubusercontent.com/openservicemesh/osm/%s/charts/osm/crds/%s.yaml`", missingCRD, missingCRD, version.Version, reqKinds[missingCRD]) + for resourceKind, isAvailable := range resourceKindAvailable { + if !isAvailable { + log.Error().Err(errSMICrds).Msgf("SMI API for Kind=%s, GroupVersion=%s is not available", resourceKind, requiredResourceKindGroupMap[resourceKind]) + return false } - return false } + return true } diff --git a/pkg/smi/health_test.go b/pkg/smi/health_test.go index 042402e491..77d9d40f94 100644 --- a/pkg/smi/health_test.go +++ b/pkg/smi/health_test.go @@ -4,45 +4,165 @@ import ( "testing" tassert "github.com/stretchr/testify/assert" - "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/fake" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/discovery" + + "github.com/openservicemesh/osm/pkg/k8s" ) -func TestCheckSMICrdsExist(t *testing.T) { - testCases := []struct { - testName string - reqKinds map[string]string - candidateVersion []string - expected bool - }{ +func TestRequiredAPIResourcesExist(t *testing.T) { + type testCase struct { + name string + discoveryClient discovery.ServerResourcesInterface + expect bool + } + + testCases := []testCase{ + { + name: "all SMI API resources exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + {Kind: "TCPRoute"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: true, + }, + { + name: "TrafficSplit does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + {Kind: "TCPRoute"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{}}, + }, + Err: nil, + }, + expect: false, + }, { - testName: "default", - reqKinds: map[string]string{"hi": "bye"}, - candidateVersion: []string{"bye"}, - expected: true, + name: "HTTPRouteGroup does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "TCPRoute"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: false, }, { - testName: "unable to get groupVersion", - reqKinds: map[string]string{}, - candidateVersion: []string{"abracadabra"}, - expected: false, + name: "TCPRoute does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: false, }, { - testName: "did not find all required CRD versions", - reqKinds: map[string]string{"may": "flower"}, - candidateVersion: []string{}, - expected: false, + name: "TrafficTarget does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + {Kind: "TCPRoute"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{}}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: false, + }, + { + name: "specs API group does not exit", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: false, + }, + { + name: "access API group does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + {Kind: "TCPRoute"}, + }}, + "split.smi-spec.io/v1alpha2": {APIResources: []metav1.APIResource{ + {Kind: "TrafficSplit"}, + }}, + }, + Err: nil, + }, + expect: false, + }, + { + name: "split API group does not exist", + discoveryClient: &k8s.FakeDiscoveryClient{ + Resources: map[string]metav1.APIResourceList{ + "specs.smi-spec.io/v1alpha4": {APIResources: []metav1.APIResource{ + {Kind: "HTTPRouteGroup"}, + {Kind: "TCPRoute"}, + }}, + "access.smi-spec.io/v1alpha3": {APIResources: []metav1.APIResource{ + {Kind: "TrafficTarget"}, + }}, + }, + Err: nil, + }, + expect: false, }, } - clientset := fake.NewSimpleClientset() - // Adds resource hi with group version bye - clientset.Resources = []*metav1.APIResourceList{{GroupVersion: "bye", APIResources: []metav1.APIResource{{Kind: "hi"}}}} for _, tc := range testCases { - t.Run(tc.testName, func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { assert := tassert.New(t) - res := checkSMICrdsExist(clientset, tc.reqKinds, tc.candidateVersion) - assert.Equal(tc.expected, res) + + c := HealthChecker{DiscoveryClient: tc.discoveryClient} + actual := c.requiredAPIResourcesExist() + assert.Equal(tc.expect, actual) }) } }