Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
smi/health: fix race condition leading to controller crash
Browse files Browse the repository at this point in the history
This change fixes a bug where a pkg scoped map is being
concurrently modified to check if SMI API resources are
available.
Additionally, it refactors the code for clarity and better
unit testing.

Signed-off-by: Shashank Ram <[email protected]>
  • Loading branch information
shashankram committed Jul 28, 2021
1 parent 509f30b commit 35dcb3d
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 77 deletions.
2 changes: 1 addition & 1 deletion cmd/osm-controller/osm-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ func main() {
clientset := extensionsClientset.NewForConfigOrDie(kubeConfig)

// Health/Liveness probes
funcProbes := []health.Probes{xdsServer, smi.HealthChecker{SMIClientset: clientset}}
funcProbes := []health.Probes{xdsServer, smi.HealthChecker{DiscoveryClient: clientset.Discovery()}}
httpServer.AddHandlers(map[string]http.Handler{
"/health/ready": health.ReadinessHandler(funcProbes, getHTTPHealthProbes()),
"/health/alive": health.LivenessHandler(funcProbes, getHTTPHealthProbes()),
Expand Down
36 changes: 12 additions & 24 deletions pkg/ingress/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,6 @@ import (
"github.com/openservicemesh/osm/pkg/service"
)

type fakeDiscoveryClient struct {
discovery.ServerResourcesInterface
resources map[string]metav1.APIResourceList
err error
}

// ServerResourcesForGroupVersion returns the supported resources for a group and version.
func (f *fakeDiscoveryClient) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) {
resp := f.resources[groupVersion]
return &resp, f.err
}

func TestGetSupportedIngressVersions(t *testing.T) {
type testCase struct {
name string
Expand All @@ -46,8 +34,8 @@ func TestGetSupportedIngressVersions(t *testing.T) {
testCases := []testCase{
{
name: "k8s API server supports both ingress v1 and v1beta",
discoveryClient: &fakeDiscoveryClient{
resources: map[string]metav1.APIResourceList{
discoveryClient: &k8s.FakeDiscoveryClient{
Resources: map[string]metav1.APIResourceList{
"networking.k8s.io/v1": {APIResources: []metav1.APIResource{
{Kind: "Ingress"},
{Kind: "NetworkPolicy"},
Expand All @@ -56,7 +44,7 @@ func TestGetSupportedIngressVersions(t *testing.T) {
{Kind: "Ingress"},
}},
},
err: nil,
Err: nil,
},
expectedVersions: map[string]bool{
"networking.k8s.io/v1": true,
Expand All @@ -66,16 +54,16 @@ func TestGetSupportedIngressVersions(t *testing.T) {
},
{
name: "k8s API server supports only ingress v1beta1",
discoveryClient: &fakeDiscoveryClient{
resources: map[string]metav1.APIResourceList{
discoveryClient: &k8s.FakeDiscoveryClient{
Resources: map[string]metav1.APIResourceList{
"networking.k8s.io/v1": {APIResources: []metav1.APIResource{
{Kind: "NetworkPolicy"},
}},
"networking.k8s.io/v1beta1": {APIResources: []metav1.APIResource{
{Kind: "Ingress"},
}},
},
err: nil,
Err: nil,
},
expectedVersions: map[string]bool{
"networking.k8s.io/v1": false,
Expand All @@ -85,16 +73,16 @@ func TestGetSupportedIngressVersions(t *testing.T) {
},
{
name: "k8s API server supports only ingress v1",
discoveryClient: &fakeDiscoveryClient{
resources: map[string]metav1.APIResourceList{
discoveryClient: &k8s.FakeDiscoveryClient{
Resources: map[string]metav1.APIResourceList{
"networking.k8s.io/v1": {APIResources: []metav1.APIResource{
{Kind: "Ingress"},
}},
"networking.k8s.io/v1beta1": {APIResources: []metav1.APIResource{
{Kind: "NetworkPolicy"},
}},
},
err: nil,
Err: nil,
},
expectedVersions: map[string]bool{
"networking.k8s.io/v1": true,
Expand All @@ -104,9 +92,9 @@ func TestGetSupportedIngressVersions(t *testing.T) {
},
{
name: "k8s API server returns an error",
discoveryClient: &fakeDiscoveryClient{
resources: map[string]metav1.APIResourceList{},
err: errors.New("fake error"),
discoveryClient: &k8s.FakeDiscoveryClient{
Resources: map[string]metav1.APIResourceList{},
Err: errors.New("fake error"),
},
expectedVersions: nil,
exepectError: true,
Expand Down
19 changes: 19 additions & 0 deletions pkg/k8s/fake.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package k8s

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/discovery"
)

// FakeDiscoveryClient is a fake client for k8s API discovery
type FakeDiscoveryClient struct {
discovery.ServerResourcesInterface
Resources map[string]metav1.APIResourceList
Err error
}

// ServerResourcesForGroupVersion returns the supported resources for a group and version.
func (f *FakeDiscoveryClient) ServerResourcesForGroupVersion(groupVersion string) (*metav1.APIResourceList, error) {
resp := f.Resources[groupVersion]
return &resp, f.Err
}
59 changes: 33 additions & 26 deletions pkg/smi/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,69 @@ import (
smiAccess "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/access/v1alpha3"
smiSpecs "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/specs/v1alpha4"
smiSplit "github.com/servicemeshinterface/smi-sdk-go/pkg/apis/split/v1alpha2"
extensionsClientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"

"github.com/openservicemesh/osm/pkg/version"
"k8s.io/client-go/discovery"
)

var (

// The key is the API Resource.Kind and the value is the SMI CRD Type
reqKinds = map[string]string{
"HTTPRouteGroup": "specs", // Full CRD API Group: specs.smi-spec.io
"TCPRoute": "specs", // Full CRD API Group: specs.smi-spec.io
"TrafficSplit": "split", // Full CRD API Group: split.smi-spec.io
"TrafficTarget": "access", // Full CRD API Group: access.smi-spec.io
// requiredResourceKindGroupMap is a mapping of the required resource kind to it's API group
requiredResourceKindGroupMap = map[string]string{
"HTTPRouteGroup": smiSpecs.SchemeGroupVersion.String(),
"TCPRoute": smiSpecs.SchemeGroupVersion.String(),
"TrafficSplit": smiSplit.SchemeGroupVersion.String(),
"TrafficTarget": smiAccess.SchemeGroupVersion.String(),
}
smiAPIGroupVersions = []string{
smiSpecs.SchemeGroupVersion.String(),
smiAccess.SchemeGroupVersion.String(),
smiSpecs.SchemeGroupVersion.String(),
smiSplit.SchemeGroupVersion.String(),
}
candidateVersions = []string{smiSpecs.SchemeGroupVersion.String(), smiAccess.SchemeGroupVersion.String(), smiSpecs.SchemeGroupVersion.String(), smiSplit.SchemeGroupVersion.String()}
)

// HealthChecker has SMI clientset interface to access SMI CRDS
type HealthChecker struct {
SMIClientset extensionsClientset.Interface
DiscoveryClient discovery.ServerResourcesInterface
}

// Liveness is the Kubernetes liveness probe handler.
func (smi HealthChecker) Liveness() bool {
return checkSMICrdsExist(smi.SMIClientset, reqKinds, candidateVersions)
func (c HealthChecker) Liveness() bool {
return c.requiredAPIResourcesExist()
}

// Readiness is the Kubernetes readiness probe handler.
func (smi HealthChecker) Readiness() bool {
return checkSMICrdsExist(smi.SMIClientset, reqKinds, candidateVersions)
func (c HealthChecker) Readiness() bool {
return c.requiredAPIResourcesExist()
}

// GetID returns the ID of the probe
func (smi HealthChecker) GetID() string {
func (c HealthChecker) GetID() string {
return "SMI"
}

func checkSMICrdsExist(clientset extensionsClientset.Interface, reqKinds map[string]string, candidateVersions []string) bool {
client := clientset.Discovery()
for _, groupVersion := range candidateVersions {
list, err := client.ServerResourcesForGroupVersion(groupVersion)
// requiredAPIResourcesExist returns true if the required API resources are available on the API server
func (c HealthChecker) requiredAPIResourcesExist() bool {
resourceKindAvailable := make(map[string]bool)
for resourceKind := range requiredResourceKindGroupMap {
resourceKindAvailable[resourceKind] = false
}
for _, groupVersion := range smiAPIGroupVersions {
list, err := c.DiscoveryClient.ServerResourcesForGroupVersion(groupVersion)
if err != nil {
log.Error().Err(err).Msgf("Error getting resources for groupVersion %s", groupVersion)
return false
}
for _, resource := range list.APIResources {
crdName := resource.Kind
delete(reqKinds, crdName)
resourceKindAvailable[resource.Kind] = true
}
}

if len(reqKinds) != 0 {
for missingCRD := range reqKinds {
log.Error().Err(errSMICrds).Msgf("Missing SMI CRD: %s. To manually install %s, do `kubectl apply -f https://raw.githubusercontent.com/openservicemesh/osm/%s/charts/osm/crds/%s.yaml`", missingCRD, missingCRD, version.Version, reqKinds[missingCRD])
for resourceKind, isAvailable := range resourceKindAvailable {
if !isAvailable {
log.Error().Err(errSMICrds).Msgf("SMI API for Kind=%s, GroupVersion=%s is not available", resourceKind, requiredResourceKindGroupMap[resourceKind])
return false
}
return false
}

return true
}
Loading

0 comments on commit 35dcb3d

Please sign in to comment.