Skip to content

Commit

Permalink
Introduce a new check for extension namespace configuration (#11629)
Browse files Browse the repository at this point in the history
* Introduce a new check for extension namespace configuration

Linkerd's extension model requires that each namespace that "owns" an
extension to be labelled with the extension name. Core extensions in
particular strictly follow this pattern. For example, the namespace viz
is installed in would be labelled with `linkerd.io/extension=viz`.

The extension is used by the CLI in many different instances. It is used
in checks, it is used in uninstalls, and so on. Whenever a namespace
contains a duplicate label value (e.g. two namespaces are registered as
the owner of "viz") we introduce undefined behaviour. Extension checks
or uninstalls may or may not work correctly. These issues are not
straightforward to debug. Misconfiguration can be introduced due to a
variety of reasons.

This change adds a new "core" category (`linkerd-extension-checks`) and
a new checker that asserts all extension namespaces are configured
properly. There are two reasons why this has been made a "core"
extension:

* Extensions may have their own health checking library. It is hard to
  share a common abstraction here without duplicating the logic. For
  example, viz imports the healthchecking package whereas the
  multicluster extension has its own. A dedicated core check will work
  better with all extensions that opt-in to use linkerd's extension
  label.
* Being part of the core checks means this is going to run before any of
  the other extension checks do which might improve visibility.

The change is straightforward; if an extension value is used for the
label key more than once across the cluster, the check issues a warning
along with the namespaces the label key and value tuple exists on.

This should be followed-up with a docs change.

Closes #11509

Signed-off-by: Matei David <[email protected]>
  • Loading branch information
mateiidavid authored Nov 20, 2023
1 parent 6b212c1 commit 2cc13d7
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 1 deletion.
1 change: 1 addition & 0 deletions cli/cmd/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ func configureAndRunChecks(cmd *cobra.Command, wout io.Writer, werr io.Writer, o
checks = append(checks, healthcheck.LinkerdOpaquePortsDefinitionChecks)
} else {
checks = append(checks, healthcheck.LinkerdControlPlaneVersionChecks)
checks = append(checks, healthcheck.LinkerdExtensionChecks)
}
checks = append(checks, healthcheck.LinkerdCNIPluginChecks)
checks = append(checks, healthcheck.LinkerdHAChecks)
Expand Down
57 changes: 56 additions & 1 deletion pkg/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ const (
// corresponding pods
LinkerdOpaquePortsDefinitionChecks CategoryID = "linkerd-opaque-ports-definition"

// LinkerdExtensionChecks adds checks to validate configuration for all
// extensions discovered in the cluster at runtime
LinkerdExtensionChecks CategoryID = "linkerd-extension-checks"

// LinkerdCNIResourceLabel is the label key that is used to identify
// whether a Kubernetes resource is related to the install-cni command
// The value is expected to be "true", "false" or "", where "false" and
Expand Down Expand Up @@ -1414,6 +1418,20 @@ func (hc *HealthChecker) allCategories() []*Category {
},
false,
),
NewCategory(
LinkerdExtensionChecks,
[]Checker{
{
description: "namespace configuration for extensions",
warning: true,
hintAnchor: "l5d-extension-namespaces",
check: func(ctx context.Context) error {
return hc.checkExtensionNsLabels(ctx)
},
},
},
false,
),
}
}

Expand Down Expand Up @@ -2673,7 +2691,7 @@ func (hc *HealthChecker) checkExtensionAPIServerAuthentication(ctx context.Conte
func (hc *HealthChecker) checkClockSkew(ctx context.Context) error {
if hc.kubeAPI == nil {
// we should never get here
return fmt.Errorf("unexpected error: Kubernetes ClientSet not initialized")
return errors.New("unexpected error: Kubernetes ClientSet not initialized")
}

var clockSkewNodes []string
Expand Down Expand Up @@ -2702,6 +2720,43 @@ func (hc *HealthChecker) checkClockSkew(ctx context.Context) error {
return nil
}

func (hc *HealthChecker) checkExtensionNsLabels(ctx context.Context) error {
if hc.kubeAPI == nil {
// oops something wrong happened
return errors.New("unexpected error: Kubernetes ClientSet not initialized")
}

namespaces, err := hc.kubeAPI.GetAllNamespacesWithExtensionLabel(ctx)
if err != nil {
return fmt.Errorf("unexpected error when retrieving namespaces: %w", err)
}

freq := make(map[string][]string)
for _, ns := range namespaces {
// We can guarantee the namespace has the extension label since we used
// a label selector when retrieving namespaces
ext := ns.Labels[k8s.LinkerdExtensionLabel]
// To make it easier to print, store already error-formatted namespace
// in freq table
freq[ext] = append(freq[ext], fmt.Sprintf("\t\t* %s", ns.Name))
}

errs := []string{}
for ext, namespaces := range freq {
if len(namespaces) == 1 {
continue
}
errs = append(errs, fmt.Sprintf("\t* label \"%s=%s\" is present on more than one namespace:\n%s", k8s.LinkerdExtensionLabel, ext, strings.Join(namespaces, "\n")))
}

if len(errs) > 0 {
return errors.New(strings.Join(
append([]string{"some extensions have invalid configuration"}, errs...), "\n"))
}

return nil
}

// CheckRoles checks that the expected roles exist.
func CheckRoles(ctx context.Context, kubeAPI *k8s.KubernetesAPI, shouldExist bool, namespace string, expectedNames []string, labelSelector string) error {
options := metav1.ListOptions{
Expand Down
74 changes: 74 additions & 0 deletions pkg/healthcheck/healthcheck_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,80 @@ status:

}

func TestNamespaceExtCfg(t *testing.T) {
namespaces := map[string]string{
"vizOne": `
apiVersion: v1
kind: Namespace
metadata:
name: viz-1
labels:
linkerd.io/extension: viz
`,
"mcOne": `
apiVersion: v1
kind: Namespace
metadata:
name: mc-1
labels:
linkerd.io/extension: multicluster
`,
"mcTwo": `
apiVersion: v1
kind: Namespace
metadata:
name: mc-2
labels:
linkerd.io/extension: multicluster
`}

testCases := []struct {
description string
k8sConfigs []string
results []string
}{
{
description: "successfully passes checks",
k8sConfigs: []string{namespaces["vizOne"], namespaces["mcOne"]},
results: []string{
"linkerd-extension-checks namespace configuration for extensions",
},
},
{
description: "fails invalid configuration",
k8sConfigs: []string{namespaces["vizOne"], namespaces["mcOne"], namespaces["mcTwo"]},
results: []string{
"linkerd-extension-checks namespace configuration for extensions: some extensions have invalid configuration\n\t* label \"linkerd.io/extension=multicluster\" is present on more than one namespace:\n\t\t* mc-1\n\t\t* mc-2",
},
},
}

for _, tc := range testCases {
// pin tc
tc := tc
t.Run(tc.description, func(t *testing.T) {
hc := NewHealthChecker(
[]CategoryID{LinkerdExtensionChecks},
&Options{
ControlPlaneNamespace: "test-ns",
},
)

var err error
hc.kubeAPI, err = k8s.NewFakeAPI(tc.k8sConfigs...)
if err != nil {
t.Fatalf("Unexpected error: %s", err)
}

obs := newObserver()
hc.RunChecks(obs.resultFn)
if diff := deep.Equal(obs.results, tc.results); diff != nil {
t.Fatalf("%+v", diff)
}
})
}
}

func TestConfigExists(t *testing.T) {

namespace := []string{`
Expand Down

0 comments on commit 2cc13d7

Please sign in to comment.