From 04742b96bf757413c88d0f15bee91679644f0337 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 5 Oct 2021 16:38:13 +0300 Subject: [PATCH] feat: import fixes/updates from kubeadm bootstrap provider This adds new conditions, and some small features, cleaning up failure conditions. Some changes: * tests no longer set up owner refs, they should be set by core CAPI controllers * set correctly `BootstrapRef` * TalosConfig controller no longer fails when requeueing is handled by watches * more tests for conditions With conditions enabled there should be no longer required to look into the controller logs to see why reconciliation failed. Signed-off-by: Andrey Smirnov --- .drone.yml | 9 +- Dockerfile | 7 +- Makefile | 8 +- README.md | 15 +- api/v1alpha3/conditions.go | 32 ++++ api/v1alpha3/talosconfig_types.go | 19 ++ api/v1alpha3/zz_generated.deepcopy.go | 8 + ...otstrap.cluster.x-k8s.io_talosconfigs.yaml | 49 ++++++ controllers/talosconfig_controller.go | 114 +++++++++--- internal/integration/README.md | 11 ++ internal/integration/constants_test.go | 6 + internal/integration/helpers_test.go | 25 +-- internal/integration/integration_test.go | 166 +++++++++++++++--- internal/integration/setup_test.go | 6 +- main.go | 13 +- 15 files changed, 401 insertions(+), 87 deletions(-) create mode 100644 api/v1alpha3/conditions.go create mode 100644 internal/integration/README.md diff --git a/.drone.yml b/.drone.yml index 62dda88..78f5baf 100644 --- a/.drone.yml +++ b/.drone.yml @@ -5,7 +5,7 @@ name: default services: - name: docker - image: docker:20.10-dind + image: ghcr.io/smira/docker:20.10-dind-hacked entrypoint: [dockerd] privileged: true volumes: @@ -53,6 +53,7 @@ steps: INTEGRATION_SKIP_CLEANUP: 1 # make things a bit faster commands: - make env-up + - make release-manifests - make test when: event: @@ -74,6 +75,10 @@ steps: environment: CODECOV_TOKEN: from_secret: CODECOV_TOKEN + when: + event: + include: + - pull_request volumes: - name: docker-socket path: /var/run @@ -176,6 +181,6 @@ depends_on: --- kind: signature -hmac: a7d3d09b2ec221337f14cde8f5f2a872a4c3500df09b7f27e6a169a4eb35944b +hmac: 405eea502f51dfc9368f81971ca97c96ac99f5fff6ddc1afa613894e80dd67c2 ... diff --git a/Dockerfile b/Dockerfile index 8c99e35..1fc041b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,7 +55,10 @@ COPY --from=generate-build /src/api /api FROM build AS integration-test-build ENV CGO_ENABLED 1 ARG TALOS_VERSION -ARG GO_LDFLAGS="-linkmode=external -extldflags '-static' -X github.com/talos-systems/cluster-api-bootstrap-provider-talos/internal/integration.TalosVersion=${TALOS_VERSION}" +ARG TAG +ARG ARTIFACTS +ARG PKG=github.com/talos-systems/cluster-api-bootstrap-provider-talos/internal/integration +ARG GO_LDFLAGS="-linkmode=external -extldflags '-static' -X ${PKG}.TalosVersion=${TALOS_VERSION} -X ${PKG}.Artifacts=${ARTIFACTS} -X ${PKG}.Tag=${TAG}" RUN --mount=type=cache,target=/.cache go test -race -ldflags "${GO_LDFLAGS}" -coverpkg=./... -v -c ./internal/integration FROM scratch AS integration-test @@ -74,7 +77,7 @@ RUN cd config/manager \ && kustomize build config/default > /bootstrap-components.yaml \ && cp config/metadata/metadata.yaml /metadata.yaml -FROM scratch AS release +FROM scratch AS release-manifests ARG TAG COPY --from=release-build /bootstrap-components.yaml /bootstrap-talos/${TAG}/bootstrap-components.yaml COPY --from=release-build /metadata.yaml /bootstrap-talos/${TAG}/metadata.yaml diff --git a/Makefile b/Makefile index 963cc55..c0dbb86 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ COMMON_ARGS += --build-arg=TOOLS=$(TOOLS) COMMON_ARGS += --build-arg=CONTROLLER_GEN_VERSION=$(CONTROLLER_GEN_VERSION) COMMON_ARGS += --build-arg=CONVERSION_GEN_VERSION=$(CONVERSION_GEN_VERSION) COMMON_ARGS += --build-arg=TALOS_VERSION=$(TALOS_VERSION) +COMMON_ARGS += --build-arg=ARTIFACTS=$(ARTIFACTS) all: manifests container @@ -91,10 +92,13 @@ release-notes: ## Create the release notes. @mkdir -p $(ARTIFACTS) ARTIFACTS=$(ARTIFACTS) ./hack/release.sh $@ $(ARTIFACTS)/RELEASE_NOTES.md $(TAG) -.PHONY: release -release: manifests container release-notes ## Create the release YAML. The build result will be ouput to the specified local destination. +.PHONY: release-manifests +release-manifests: @$(MAKE) local-$@ DEST=./$(ARTIFACTS) PLATFORM=linux/amd64 +.PHONY: release +release: manifests container release-notes release-manifests ## Create the release YAML. The build result will be ouput to the specified local destination. + .PHONY: deploy deploy: manifests ## Deploy to a cluster. This is for testing purposes only. kubectl apply -k config/default diff --git a/README.md b/README.md index 34c5fd1..fec8bfe 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Intro -The Cluster API Bootstrap Provider Talos (CABPT) is a project by [Talos Systems](https://www.talos-systems.com/) that provides a [Cluster API](https://github.com/kubernetes-sigs/cluster-api)(CAPI) bootstrap provider for use in deploying Talos-based Kubernetes nodes across any environment. +The Cluster API Bootstrap Provider Talos (CABPT) is a project by [Sidero Labs](https://www.siderolabs.com/) that provides a [Cluster API](https://github.com/kubernetes-sigs/cluster-api)(CAPI) bootstrap provider for use in deploying Talos-based Kubernetes nodes across any environment. Given some basic info, this provider will generate bootstrap configurations for a given machine and reconcile the necessary custom resources for CAPI to pick up the generated data. ## Corequisites @@ -10,7 +10,6 @@ Given some basic info, this provider will generate bootstrap configurations for There are a few corequisites and assumptions that go into using this project: - [Cluster API](https://github.com/kubernetes-sigs/cluster-api) -- [Cluster API Provider Metal](https://github.com/talos-systems/cluster-api-provider-metal) (optional) ## Building and Installing @@ -24,14 +23,14 @@ You will need at least the upstream CAPI components and an infrastructure provid ## Usage -CAPM supports a single API type, a TalosConfig. +CABPT supports a single API type, a TalosConfig. You can create YAML definitions of a TalosConfig and `kubectl apply` them as part of a larger CAPI cluster deployment. Below is a bare-minimum example. A basic config: ```yaml -apiVersion: bootstrap.cluster.x-k8s.io/v1alpha2 +apiVersion: bootstrap.cluster.x-k8s.io/v1alpha3 kind: TalosConfig metadata: name: talos-0 @@ -48,13 +47,13 @@ When creating a TalosConfig this way, you can then retrieve the talosconfig file If you wish to do something more complex, we allow for the ability to supply an entire Talos config file to the resource. This can be done by setting the generateType to `none` and specifying a `data` field. -This config file can be generated with `osctl config generate` and the edited to supply the various options you may desire. +This config file can be generated with `talosctl config generate` and the edited to supply the various options you may desire. This full config is blindly copied from the `data` section of the spec and presented under `.status.bootstrapData` so that the upstream CAPI controllers can see it and make use. An example of a more complex config: ```yaml -apiVersion: bootstrap.cluster.x-k8s.io/v1alpha2 +apiVersion: bootstrap.cluster.x-k8s.io/v1alpha3 kind: TalosConfig metadata: name: talos-0 @@ -72,5 +71,5 @@ spec: ... ``` -Note that specifying the full config above removes the ability for our bootstrap provider to generate a talosconfig for use. -As such, you should keep track of the talosconfig that's generated when running `osctl config generate`. +Note that specifying the full config above removes the ability for our bootstrap provider to generate a machine configuration for use. +As such, you should keep track of the machine configuration that's generated when running `talosctl config generate`. diff --git a/api/v1alpha3/conditions.go b/api/v1alpha3/conditions.go new file mode 100644 index 0000000..393e32d --- /dev/null +++ b/api/v1alpha3/conditions.go @@ -0,0 +1,32 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +package v1alpha3 + +import ( + capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4" +) + +// Conditions and condition Reasons for the TalosConfig object + +const ( + // DataSecretAvailableCondition documents the status of the bootstrap secret generation process. + // + // NOTE: When the DataSecret generation starts the process completes immediately and within the + // same reconciliation, so the user will always see a transition from Wait to Generated without having + // evidence that BootstrapSecret generation is started/in progress. + DataSecretAvailableCondition capiv1.ConditionType = "DataSecretAvailable" + + // WaitingForClusterInfrastructureReason (Severity=Info) document a bootstrap secret generation process + // waiting for the cluster infrastructure to be ready. + // + // NOTE: Having the cluster infrastructure ready is a pre-condition for starting to create machines; + // the TalosConfig controller ensure this pre-condition is satisfied. + WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" + + // DataSecretGenerationFailedReason (Severity=Warning) documents a TalosConfig controller detecting + // an error while generating a data secret; those kind of errors are usually due to misconfigurations + // and user intervention is required to get them fixed. + DataSecretGenerationFailedReason = "DataSecretGenerationFailed" +) diff --git a/api/v1alpha3/talosconfig_types.go b/api/v1alpha3/talosconfig_types.go index b68e7f5..a392716 100644 --- a/api/v1alpha3/talosconfig_types.go +++ b/api/v1alpha3/talosconfig_types.go @@ -6,6 +6,7 @@ package v1alpha3 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4" ) const ( @@ -42,6 +43,14 @@ type TalosConfigStatus struct { // FailureMessage will be set on non-retryable errors // +optional FailureMessage string `json:"failureMessage,omitempty"` + + // ObservedGeneration is the latest generation observed by the controller. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // Conditions defines current service state of the TalosConfig. + // +optional + Conditions capiv1.Conditions `json:"conditions,omitempty"` } // +kubebuilder:object:root=true @@ -58,6 +67,16 @@ type TalosConfig struct { Status TalosConfigStatus `json:"status,omitempty"` } +// GetConditions returns the set of conditions for this object. +func (c *TalosConfig) GetConditions() capiv1.Conditions { + return c.Status.Conditions +} + +// SetConditions sets the conditions on this object. +func (c *TalosConfig) SetConditions(conditions capiv1.Conditions) { + c.Status.Conditions = conditions +} + // +kubebuilder:object:root=true // TalosConfigList contains a list of TalosConfig diff --git a/api/v1alpha3/zz_generated.deepcopy.go b/api/v1alpha3/zz_generated.deepcopy.go index 7e7586c..4593ee7 100644 --- a/api/v1alpha3/zz_generated.deepcopy.go +++ b/api/v1alpha3/zz_generated.deepcopy.go @@ -11,6 +11,7 @@ package v1alpha3 import ( runtime "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/cluster-api/api/v1alpha4" ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. @@ -118,6 +119,13 @@ func (in *TalosConfigStatus) DeepCopyInto(out *TalosConfigStatus) { *out = new(string) **out = **in } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(v1alpha4.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TalosConfigStatus. diff --git a/config/crd/bases/bootstrap.cluster.x-k8s.io_talosconfigs.yaml b/config/crd/bases/bootstrap.cluster.x-k8s.io_talosconfigs.yaml index 7325e60..6864c92 100644 --- a/config/crd/bases/bootstrap.cluster.x-k8s.io_talosconfigs.yaml +++ b/config/crd/bases/bootstrap.cluster.x-k8s.io_talosconfigs.yaml @@ -118,6 +118,50 @@ spec: status: description: TalosConfigStatus defines the observed state of TalosConfig properties: + conditions: + description: Conditions defines current service state of the TalosConfig. + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. This should be when the underlying condition changed. + If that is not known, then using the time when the API field + changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. The specific API may choose whether or not this + field is considered a guaranteed API. This field may not be + empty. + type: string + severity: + description: Severity provides an explicit classification of + Reason code, so the users or machines can immediately understand + the current situation and act accordingly. The Severity field + MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. + type: string + required: + - status + - type + type: object + type: array dataSecretName: description: DataSecretName is the name of the secret that stores the bootstrap data script. @@ -128,6 +172,11 @@ spec: failureReason: description: FailureReason will be set on non-retryable errors type: string + observedGeneration: + description: ObservedGeneration is the latest generation observed + by the controller. + format: int64 + type: integer ready: description: Ready indicates the BootstrapData field is ready to be consumed diff --git a/controllers/talosconfig_controller.go b/controllers/talosconfig_controller.go index 081d813..e9f360e 100644 --- a/controllers/talosconfig_controller.go +++ b/controllers/talosconfig_controller.go @@ -29,6 +29,7 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1alpha4" "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/predicates" ctrl "sigs.k8s.io/controller-runtime" @@ -53,8 +54,9 @@ var ( // TalosConfigReconciler reconciles a TalosConfig object type TalosConfigReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme + Log logr.Logger + Scheme *runtime.Scheme + WatchFilterValue string } type TalosConfigScope struct { @@ -86,6 +88,7 @@ func (r *TalosConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M b := ctrl.NewControllerManagedBy(mgr). For(&bootstrapv1alpha3.TalosConfig{}). WithOptions(options). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). Watches( &source.Kind{Type: &capiv1.Machine{}}, handler.EnqueueRequestsFromMapFunc(r.MachineToBootstrapMapFunc), @@ -95,7 +98,8 @@ func (r *TalosConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M b = b.Watches( &source.Kind{Type: &expv1.MachinePool{}}, handler.EnqueueRequestsFromMapFunc(r.MachinePoolToBootstrapMapFunc), - ) + ).WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)) + } c, err := b.Build(r) @@ -106,7 +110,10 @@ func (r *TalosConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M err = c.Watch( &source.Kind{Type: &capiv1.Cluster{}}, handler.EnqueueRequestsFromMapFunc(r.ClusterToTalosConfigs), - predicates.ClusterUnpausedAndInfrastructureReady(r.Log), + predicates.All(ctrl.LoggerFrom(ctx), + predicates.ClusterUnpausedAndInfrastructureReady(ctrl.LoggerFrom(ctx)), + predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), + ), ) if err != nil { return err @@ -143,7 +150,25 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // Always attempt to Patch the TalosConfig object and status after each reconciliation. defer func() { - if err := patchHelper.Patch(ctx, config); err != nil { + // always update the readyCondition; the summary is represented using the "1 of x completed" notation. + conditions.SetSummary(config, + conditions.WithConditions( + bootstrapv1alpha3.DataSecretAvailableCondition, + ), + ) + // Patch ObservedGeneration only if the reconciliation completed successfully + patchOpts := []patch.Option{ + patch.WithOwnedConditions{ + Conditions: []capiv1.ConditionType{ + bootstrapv1alpha3.DataSecretAvailableCondition, + }, + }, + } + if rerr == nil { + patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) + } + + if err := patchHelper.Patch(ctx, config, patchOpts...); err != nil { log.Error(err, "failed to patch config") if rerr == nil { rerr = err @@ -151,10 +176,8 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) } }() - // If the talosConfig doesn't have our finalizer, add it. - controllerutil.AddFinalizer(config, bootstrapv1alpha3.ConfigFinalizer) - - // Handle deleted machines + // Handle deleted talosconfigs + // We no longer set finalizers on talosconfigs, but we have to remove previously set finalizers if !config.ObjectMeta.DeletionTimestamp.IsZero() { return r.reconcileDelete(ctx, config) } @@ -165,29 +188,45 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Error(err, "could not get owner resource") return ctrl.Result{}, err } + if owner == nil { log.Info("Waiting for OwnerRef on the talosconfig") - return ctrl.Result{}, errors.New("no owner ref") + return ctrl.Result{}, nil } + log = log.WithName(fmt.Sprintf("owner-name=%s", owner.GetName())) // Lookup the cluster the machine is associated with cluster, err := util.GetClusterByName(ctx, r.Client, owner.GetNamespace(), owner.ClusterName()) if err != nil { + if errors.Is(err, util.ErrNoCluster) { + log.Info(fmt.Sprintf("%s does not belong to a cluster yet, waiting until it's part of a cluster", owner.GetKind())) + return ctrl.Result{}, nil + } + + if apierrors.IsNotFound(err) { + log.Info("Cluster does not exist yet, waiting until it is created") + return ctrl.Result{}, nil + } + log.Error(err, "could not get cluster by machine metadata") + return ctrl.Result{}, err } // bail super early if it's already ready if config.Status.Ready { log.Info("ignoring an already ready config") + conditions.MarkTrue(config, bootstrapv1alpha3.DataSecretAvailableCondition) return ctrl.Result{}, nil } // Wait patiently for the infrastructure to be ready if !cluster.Status.InfrastructureReady { log.Info("Infrastructure is not ready, waiting until ready.") - return ctrl.Result{}, errors.New("infra not ready") + conditions.MarkFalse(config, bootstrapv1alpha3.DataSecretAvailableCondition, bootstrapv1alpha3.WaitingForClusterInfrastructureReason, capiv1.ConditionSeverityInfo, "") + + return ctrl.Result{}, nil } // Reconcile status for machines that already have a secret reference, but our status isn't up to date. @@ -195,6 +234,7 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) if owner.DataSecretName() != nil && (!config.Status.Ready || config.Status.DataSecretName == nil) { config.Status.Ready = true config.Status.DataSecretName = owner.DataSecretName() + conditions.MarkTrue(config, bootstrapv1alpha3.DataSecretAvailableCondition) return ctrl.Result{}, nil } @@ -205,7 +245,25 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) Cluster: cluster, } - var retData *TalosConfigBundle + if err = r.reconcileGenerate(ctx, tcScope); err != nil { + conditions.MarkFalse(config, bootstrapv1alpha3.DataSecretAvailableCondition, bootstrapv1alpha3.DataSecretGenerationFailedReason, capiv1.ConditionSeverityError, err.Error()) + + return ctrl.Result{}, err + } + + config.Status.Ready = true + conditions.MarkTrue(config, bootstrapv1alpha3.DataSecretAvailableCondition) + + return ctrl.Result{}, nil +} + +func (r *TalosConfigReconciler) reconcileGenerate(ctx context.Context, tcScope *TalosConfigScope) error { + var ( + retData *TalosConfigBundle + err error + ) + + config := tcScope.Config machineType, _ := machine.ParseType(config.Spec.GenerateType) //nolint:errcheck // handle errors later @@ -213,22 +271,23 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Slurp and use user-supplied configs case config.Spec.GenerateType == "none": if config.Spec.Data == "" { - return ctrl.Result{}, errors.New("failed to specify config data with none generate type") + return errors.New("failed to specify config data with none generate type") } + retData, err = r.userConfigs(ctx, tcScope) if err != nil { - return ctrl.Result{}, err + return err } // Generate configs on the fly case machineType != machine.TypeUnknown: retData, err = r.genConfigs(ctx, tcScope) if err != nil { - return ctrl.Result{}, err + return err } default: - return ctrl.Result{}, fmt.Errorf("unknown generate type specified: %q", config.Spec.GenerateType) + return fmt.Errorf("unknown generate type specified: %q", config.Spec.GenerateType) } // Handle patches to the machine config if they were specified @@ -236,17 +295,17 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) if len(config.Spec.ConfigPatches) > 0 { marshalledPatches, err := json.Marshal(config.Spec.ConfigPatches) if err != nil { - return ctrl.Result{}, fmt.Errorf("failure marshalling config patches: %s", err) + return fmt.Errorf("failure marshalling config patches: %s", err) } patch, err := jsonpatch.DecodePatch(marshalledPatches) if err != nil { - return ctrl.Result{}, fmt.Errorf("failure decoding config patches from talosconfig to rfc6902 patch: %s", err) + return fmt.Errorf("failure decoding config patches from talosconfig to rfc6902 patch: %s", err) } patchedBytes, err := configpatcher.JSON6902([]byte(retData.BootstrapData), patch) if err != nil { - return ctrl.Result{}, err + return err } retData.BootstrapData = string(patchedBytes) @@ -254,19 +313,21 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Packet acts a fool if you don't prepend #!talos to the userdata // so we try to suss out if that's the type of machine/machinePool getting created. - if owner.IsMachinePool() { + if tcScope.ConfigOwner.IsMachinePool() { mp := &expv1.MachinePool{} - if err := runtime.DefaultUnstructuredConverter.FromUnstructured(owner.Object, mp); err != nil { - return ctrl.Result{}, err + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(tcScope.ConfigOwner.Object, mp); err != nil { + return err } + if mp.Spec.Template.Spec.InfrastructureRef.Kind == "PacketMachinePool" { retData.BootstrapData = "#!talos\n" + retData.BootstrapData } } else { machine := &capiv1.Machine{} - if err := runtime.DefaultUnstructuredConverter.FromUnstructured(owner.Object, machine); err != nil { - return ctrl.Result{}, err + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(tcScope.ConfigOwner.Object, machine); err != nil { + return err } + if machine.Spec.InfrastructureRef.Name == "PacketMachine" { retData.BootstrapData = "#!talos\n" + retData.BootstrapData } @@ -276,14 +337,13 @@ func (r *TalosConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) dataSecretName, err = r.writeBootstrapData(ctx, tcScope, []byte(retData.BootstrapData)) if err != nil { - return ctrl.Result{}, err + return err } config.Status.DataSecretName = &dataSecretName config.Status.TalosConfig = retData.TalosConfig - config.Status.Ready = true - return ctrl.Result{}, nil + return nil } func (r *TalosConfigReconciler) reconcileDelete(ctx context.Context, config *bootstrapv1alpha3.TalosConfig) (ctrl.Result, error) { diff --git a/internal/integration/README.md b/internal/integration/README.md new file mode 100644 index 0000000..f1caaa2 --- /dev/null +++ b/internal/integration/README.md @@ -0,0 +1,11 @@ +# Running integration tests + +Bring up Talos cluster (or any other Kubernetes clusters). + +Put `kubeconfig` to the root of the repository (for Talos: `talosctl -n kubeconfig -f kubeconfig`). + +Create release with fixed tag: `make release-manifests TAG=v0.4.0` (update release if the CRDs are updated). + +Run tests: `make test TAG=v0.4.0`. + +Tests clean up after the run, so they can be run repeatedly against the cluster. diff --git a/internal/integration/constants_test.go b/internal/integration/constants_test.go index b3b17cd..6db4198 100644 --- a/internal/integration/constants_test.go +++ b/internal/integration/constants_test.go @@ -6,3 +6,9 @@ package integration // TalosVersion is set by the build process. var TalosVersion string + +// Artifacts is set by the build process. +var Artifacts string + +// Tag is set by the build process. +var Tag string diff --git a/internal/integration/helpers_test.go b/internal/integration/helpers_test.go index 277ee2e..fba28cf 100644 --- a/internal/integration/helpers_test.go +++ b/internal/integration/helpers_test.go @@ -22,11 +22,9 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes/scheme" capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" bootstrapv1alpha3 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3" // +kubebuilder:scaffold:imports @@ -61,7 +59,7 @@ func generateName(t *testing.T, kind string) string { } // createCluster creates a Cluster with "ready" infrastructure. -func createCluster(ctx context.Context, t *testing.T, c client.Client, namespaceName string, spec *capiv1.ClusterSpec) *capiv1.Cluster { +func createCluster(ctx context.Context, t *testing.T, c client.Client, namespaceName string, spec *capiv1.ClusterSpec, infrastructureReady bool) *capiv1.Cluster { t.Helper() clusterName := generateName(t, "cluster") @@ -86,17 +84,19 @@ func createCluster(ctx context.Context, t *testing.T, c client.Client, namespace require.NoError(t, c.Create(ctx, cluster), "can't create a cluster") - patchHelper, err := patch.NewHelper(cluster, c) - require.NoError(t, err) + if infrastructureReady { + patchHelper, err := patch.NewHelper(cluster, c) + require.NoError(t, err) - cluster.Status.InfrastructureReady = true - require.NoError(t, patchHelper.Patch(ctx, cluster)) + cluster.Status.InfrastructureReady = true + require.NoError(t, patchHelper.Patch(ctx, cluster)) + } return cluster } // createMachine creates a Machine owned by the Cluster. -func createMachine(ctx context.Context, t *testing.T, c client.Client, cluster *capiv1.Cluster) *capiv1.Machine { +func createMachine(ctx context.Context, t *testing.T, c client.Client, cluster *capiv1.Cluster, talosconfig *bootstrapv1alpha3.TalosConfig) *capiv1.Machine { t.Helper() machineName := generateName(t, "machine") @@ -111,6 +111,9 @@ func createMachine(ctx context.Context, t *testing.T, c client.Client, cluster * ConfigRef: &corev1.ObjectReference{ Kind: "TalosConfig", APIVersion: bootstrapv1alpha3.GroupVersion.String(), + Name: talosconfig.GetName(), + Namespace: talosconfig.GetNamespace(), + UID: talosconfig.GetUID(), }, }, }, @@ -122,20 +125,18 @@ func createMachine(ctx context.Context, t *testing.T, c client.Client, cluster * } // createTalosConfig creates a TalosConfig owned by the Machine. -func createTalosConfig(ctx context.Context, t *testing.T, c client.Client, machine *capiv1.Machine, spec bootstrapv1alpha3.TalosConfigSpec) *bootstrapv1alpha3.TalosConfig { +func createTalosConfig(ctx context.Context, t *testing.T, c client.Client, namespaceName string, spec bootstrapv1alpha3.TalosConfigSpec) *bootstrapv1alpha3.TalosConfig { t.Helper() talosConfigName := generateName(t, "talosconfig") talosConfig := &bootstrapv1alpha3.TalosConfig{ ObjectMeta: metav1.ObjectMeta{ - Namespace: machine.Namespace, + Namespace: namespaceName, Name: talosConfigName, }, Spec: spec, } - require.NoError(t, controllerutil.SetOwnerReference(machine, talosConfig, scheme.Scheme)) - require.NoError(t, c.Create(ctx, talosConfig)) // TODO that should not be needed diff --git a/internal/integration/integration_test.go b/internal/integration/integration_test.go index a9ac8d2..907f2b6 100644 --- a/internal/integration/integration_test.go +++ b/internal/integration/integration_test.go @@ -7,6 +7,7 @@ package integration import ( "encoding/json" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -16,7 +17,10 @@ import ( corev1 "k8s.io/api/core/v1" apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" ) func TestIntegration(t *testing.T) { @@ -28,11 +32,11 @@ func TestIntegration(t *testing.T) { t.Parallel() namespaceName := setupTest(ctx, t, c) - cluster := createCluster(ctx, t, c, namespaceName, nil) - machine := createMachine(ctx, t, c, cluster) - talosConfig := createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + cluster := createCluster(ctx, t, c, namespaceName, nil, true) + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: talosmachine.TypeInit.String(), }) + createMachine(ctx, t, c, cluster, talosConfig) waitForReady(ctx, t, c, talosConfig) assertClientConfig(t, talosConfig) @@ -50,34 +54,36 @@ func TestIntegration(t *testing.T) { t.Parallel() namespaceName := setupTest(ctx, t, c) - cluster := createCluster(ctx, t, c, namespaceName, nil) + cluster := createCluster(ctx, t, c, namespaceName, nil, true) controlplanes := []*bootstrapv1alpha3.TalosConfig{} for i := 0; i < 3; i++ { - machine := createMachine(ctx, t, c, cluster) - machineType := talosmachine.TypeInit if i > 0 { machineType = talosmachine.TypeControlPlane } - controlplanes = append(controlplanes, createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: machineType.String(), TalosVersion: TalosVersion, - })) + }) + createMachine(ctx, t, c, cluster, talosConfig) + + controlplanes = append(controlplanes, talosConfig) } workers := []*bootstrapv1alpha3.TalosConfig{} for i := 0; i < 4; i++ { - machine := createMachine(ctx, t, c, cluster) - - workers = append(workers, createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: talosmachine.TypeWorker.String(), TalosVersion: TalosVersion, - })) + }) + createMachine(ctx, t, c, cluster, talosConfig) + + workers = append(workers, talosConfig) } for i, talosConfig := range append(append([]*bootstrapv1alpha3.TalosConfig{}, controlplanes...), workers...) { @@ -131,12 +137,12 @@ func TestIntegration(t *testing.T) { Host: "example.com", Port: 443, }, - }) - machine := createMachine(ctx, t, c, cluster) - talosConfig := createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + }, true) + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: talosmachine.TypeInit.String(), TalosVersion: TalosVersion, }) + createMachine(ctx, t, c, cluster, talosConfig) waitForReady(ctx, t, c, talosConfig) provider := assertMachineConfiguration(ctx, t, c, talosConfig) @@ -151,9 +157,8 @@ func TestIntegration(t *testing.T) { t.Parallel() namespaceName := setupTest(ctx, t, c) - cluster := createCluster(ctx, t, c, namespaceName, nil) - machine := createMachine(ctx, t, c, cluster) - talosConfig := createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + cluster := createCluster(ctx, t, c, namespaceName, nil, true) + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: talosmachine.TypeInit.String(), TalosVersion: TalosVersion, ConfigPatches: []bootstrapv1alpha3.ConfigPatches{ @@ -173,6 +178,7 @@ func TestIntegration(t *testing.T) { }, }, }) + createMachine(ctx, t, c, cluster, talosConfig) waitForReady(ctx, t, c, talosConfig) provider := assertMachineConfiguration(ctx, t, c, talosConfig) @@ -185,8 +191,7 @@ func TestIntegration(t *testing.T) { t.Parallel() namespaceName := setupTest(ctx, t, c) - cluster := createCluster(ctx, t, c, namespaceName, nil) - machine := createMachine(ctx, t, c, cluster) + cluster := createCluster(ctx, t, c, namespaceName, nil, true) // create a secret which imitates legacy secret format. clusterSecret := corev1.Secret{ @@ -202,10 +207,11 @@ func TestIntegration(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(legacySecretData), &clusterSecret.Data)) require.NoError(t, c.Create(ctx, &clusterSecret)) - talosConfig := createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: talosmachine.TypeControlPlane.String(), TalosVersion: TalosVersion, }) + createMachine(ctx, t, c, cluster, talosConfig) waitForReady(ctx, t, c, talosConfig) provider := assertMachineConfiguration(ctx, t, c, talosConfig) @@ -223,7 +229,7 @@ func TestIntegration(t *testing.T) { t.Parallel() namespaceName := setupTest(ctx, t, c) - cluster := createCluster(ctx, t, c, namespaceName, nil) + cluster := createCluster(ctx, t, c, namespaceName, nil, true) secretsBundle, err := generate.NewSecretsBundle(generate.NewClock()) require.NoError(t, err) @@ -234,7 +240,6 @@ func TestIntegration(t *testing.T) { workers := []*bootstrapv1alpha3.TalosConfig{} for i := 0; i < 4; i++ { - machine := createMachine(ctx, t, c, cluster) machineconfig, err := generate.Config(talosmachine.TypeWorker, input) require.NoError(t, err) @@ -242,16 +247,18 @@ func TestIntegration(t *testing.T) { configdata, err := machineconfig.Bytes() require.NoError(t, err) - workers = append(workers, createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: "none", Data: string(configdata), - })) + }) + createMachine(ctx, t, c, cluster, talosConfig) + + workers = append(workers, talosConfig) } controlplanes := []*bootstrapv1alpha3.TalosConfig{} for i := 0; i < 3; i++ { - machine := createMachine(ctx, t, c, cluster) machineType := talosmachine.TypeInit @@ -265,10 +272,13 @@ func TestIntegration(t *testing.T) { configdata, err := machineconfig.Bytes() require.NoError(t, err) - controlplanes = append(controlplanes, createTalosConfig(ctx, t, c, machine, bootstrapv1alpha3.TalosConfigSpec{ + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ GenerateType: "none", Data: string(configdata), - })) + }) + createMachine(ctx, t, c, cluster, talosConfig) + + controlplanes = append(controlplanes, talosConfig) } for i, talosConfig := range append(append([]*bootstrapv1alpha3.TalosConfig{}, controlplanes...), workers...) { @@ -297,6 +307,106 @@ func TestIntegration(t *testing.T) { assertCompatibleMachineConfigs(ctx, t, c, append(append([]*bootstrapv1alpha3.TalosConfig{}, controlplanes...), workers...)...) }) + t.Run("InfrastructureNotReady", func(t *testing.T) { + t.Parallel() + + namespaceName := setupTest(ctx, t, c) + cluster := createCluster(ctx, t, c, namespaceName, nil, false) + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ + GenerateType: talosmachine.TypeInit.String(), + }) + createMachine(ctx, t, c, cluster, talosConfig) + + // assert that controller reports condition + + for ctx.Err() == nil { + key := types.NamespacedName{ + Namespace: talosConfig.Namespace, + Name: talosConfig.Name, + } + + err := c.Get(ctx, key, talosConfig) + require.NoError(t, err) + + if conditions.IsFalse(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition) && + conditions.GetReason(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition) == bootstrapv1alpha3.WaitingForClusterInfrastructureReason { + break + } + + t.Log("Waiting ...") + sleepCtx(ctx, 3*time.Second) + } + + require.NoError(t, ctx.Err()) + + assert.Equal(t, capiv1.ConditionSeverityInfo, *conditions.GetSeverity(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition)) + + // patch to make infrastructure ready + patchHelper, err := patch.NewHelper(cluster, c) + require.NoError(t, err) + + cluster.Status.InfrastructureReady = true + require.NoError(t, patchHelper.Patch(ctx, cluster)) + + waitForReady(ctx, t, c, talosConfig) + + assertClientConfig(t, talosConfig) + + provider := assertMachineConfiguration(ctx, t, c, talosConfig) + + assert.Equal(t, talosmachine.TypeInit, provider.Machine().Type()) + + assertClusterCA(ctx, t, c, cluster, provider) + + assertControllerSecret(ctx, t, c, cluster, provider) + }) + + t.Run("BadConfigPatch", func(t *testing.T) { + t.Parallel() + + namespaceName := setupTest(ctx, t, c) + cluster := createCluster(ctx, t, c, namespaceName, nil, true) + talosConfig := createTalosConfig(ctx, t, c, namespaceName, bootstrapv1alpha3.TalosConfigSpec{ + GenerateType: talosmachine.TypeInit.String(), + TalosVersion: TalosVersion, + ConfigPatches: []bootstrapv1alpha3.ConfigPatches{ + { + Op: "add", + Path: "/machine/time/servers", + Value: apiextensions.JSON{ + Raw: []byte(`["time.cloudflare.com"]`), + }, + }, + }, + }) + createMachine(ctx, t, c, cluster, talosConfig) + + // assert that controller reports failure condition + for ctx.Err() == nil { + key := types.NamespacedName{ + Namespace: talosConfig.Namespace, + Name: talosConfig.Name, + } + + err := c.Get(ctx, key, talosConfig) + require.NoError(t, err) + + if conditions.IsFalse(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition) && + conditions.GetReason(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition) == bootstrapv1alpha3.DataSecretGenerationFailedReason { + break + } + + t.Log("Waiting ...") + sleepCtx(ctx, 3*time.Second) + } + + require.NoError(t, ctx.Err()) + + assert.Equal(t, capiv1.ConditionSeverityError, *conditions.GetSeverity(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition)) + assert.Equal(t, + "failure applying rfc6902 patches to talos machine config: add operation does not apply: doc is missing path: \"/machine/time/servers\": missing value", + conditions.GetMessage(talosConfig, bootstrapv1alpha3.DataSecretAvailableCondition)) + }) } // legacy cluster secret format diff --git a/internal/integration/setup_test.go b/internal/integration/setup_test.go index 2e2a48f..d5c93fe 100644 --- a/internal/integration/setup_test.go +++ b/internal/integration/setup_test.go @@ -131,7 +131,7 @@ func setupTest(ctx context.Context, t *testing.T, c client.Client) string { // this allows us to override that removing the finalizer(s) var machineList capiv1.MachineList - err = c.List(ctx, &machineList, client.InNamespace(namespace)) + err = c.List(context.Background(), &machineList, client.InNamespace(namespace)) if err != nil { t.Log("error listing machines", err) @@ -141,7 +141,7 @@ func setupTest(ctx context.Context, t *testing.T, c client.Client) string { for _, machine := range machineList.Items { machine.Finalizers = nil - if err = c.Update(ctx, &machine); err != nil { + if err = c.Update(context.Background(), &machine); err != nil { // conflicts might be ignored here, as eventually this will succeed t.Log("error updating machine's finalizers", err) } @@ -218,7 +218,7 @@ func startTestEnv(ctx context.Context, t *testing.T) *rest.Config { t.Helper() testEnv := &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + CRDDirectoryPaths: []string{filepath.Join("..", "..", Artifacts, "bootstrap-talos", Tag)}, CRDInstallOptions: envtest.CRDInstallOptions{ ErrorIfPathMissing: true, MaxTime: 20 * time.Second, diff --git a/main.go b/main.go index 0d421a7..30b6f0f 100644 --- a/main.go +++ b/main.go @@ -7,12 +7,14 @@ package main import ( "context" "flag" + "fmt" "math/rand" "os" "time" "github.com/spf13/pflag" cgrecord "k8s.io/client-go/tools/record" + capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/feature" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -29,6 +31,7 @@ var ( metricsAddr string enableLeaderElection bool webhookPort int + watchFilterValue string ) func InitFlags(fs *pflag.FlagSet) { @@ -41,6 +44,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.IntVar(&webhookPort, "webhook-port", 9443, "Webhook Server port, disabled by default. When enabled, the manager will only work as webhook server, no reconcilers are installed.") + fs.StringVar(&watchFilterValue, "watch-filter", "", + fmt.Sprintf("Label value that the controller watches to reconcile cluster-api objects. Label key is always %s. If unspecified, the controller watches for all cluster-api objects.", capiv1.WatchLabel)) + feature.MutableGates.AddFlag(fs) } @@ -76,9 +82,10 @@ func main() { ctx := context.Background() if err = (&controllers.TalosConfigReconciler{ - Client: mgr.GetClient(), - Log: ctrl.Log.WithName("controllers").WithName("TalosConfig"), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("controllers").WithName("TalosConfig"), + Scheme: mgr.GetScheme(), + WatchFilterValue: watchFilterValue, }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: 10}); err != nil { setupLog.Error(err, "unable to create controller", "controller", "TalosConfig") os.Exit(1)