diff --git a/README.md b/README.md index 597cd5a..3adf405 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,12 @@ This provider's versions are compatible with the following versions of Talos: This control plane provider can be installed with clusterctl: ```bash -clusterctl init -c talos -b talos +clusterctl init -c talos -b talos -i ``` +If you are going to use this provider as part of Sidero management plane, please refer to [Sidero Docs](https://www.sidero.dev/docs/v0.4/getting-started/install-clusterapi/) +on how to install and configure it. + This project can be built simply by running `make release` from the root directory. Doing so will create a file called `_out/control-plane-components.yaml`. If you wish, you can tweak settings by editing the release yaml. @@ -58,8 +61,76 @@ You will need at least the upstream CAPI components, the Talos bootstrap provide ## Usage +### Supported Templates + You can use recommended [Cluster API templates](https://github.com/talos-systems/cluster-api-templates) provided by Sidero Labs. + It contains templates for `AWS` and `GCP`, which are verified by the integration tests. -If you are going to use this provider as part of Sidero management plane, please refer to [Sidero Docs](https://www.sidero.dev/docs/v0.4/getting-started/install-clusterapi/) -on how to install and configure it. +### Creating Your Own Templates + +If you wish to craft your own manifests, here is some important info. + +CACPPT supports a single API type, a TalosControlPlane. +You can create YAML definitions of a TalosControlPlane and `kubectl apply` them as part of a larger CAPI cluster deployment. +Below is a bare-minimum example. + +A basic config: + +```yaml +apiVersion: controlplane.cluster.x-k8s.io/v1alpha3 +kind: TalosControlPlane +metadata: + name: talos-cp +spec: + version: v1.18.1 + replicas: 1 + infrastructureTemplate: + kind: MetalMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha3 + name: talos-cp + controlPlaneConfig: + controlplane: + generateType: controlplane +``` + +Note you must provide an infrastructure template for your control plane. +See your infrastructure provider for how to craft that. + +Note the generateType mentioned above. +This is a required value in the spec for both controlplane and worker ("join") nodes. +For a no-frills control plane config, you can simply specify `controlplane` depending on each config section. +When creating a TalosControlPlane this way, you can then retrieve the talosconfig file that allows for osctl interaction with your nodes by doing something like `kubectl get talosconfig -o yaml talos-cp-xxxx -o jsonpath='{.status.talosConfig}'` after creation. + +If you wish to do something more complex, we allow for the ability to supply an entire Talos machine config file to the resource. +This can be done by setting the generateType to `none` and specifying a `data` field. +This config file can be generated with `talosctl config generate` and the edited to supply the various options you may desire. +This full config is blindly copied from the `data` section of the spec and presented under `.status.controlPlaneData` so that the upstream CAPI controllers can see it and make use. + +An example of a more complex config: + +```yaml +apiVersion: control-plane.cluster.x-k8s.io/v1alpha2 +kind: TalosControlPlane +metadata: + name: talos-0 + labels: + cluster.x-k8s.io/cluster-name: talos +spec: + controlPlaneConfig: + init: + generateType: none + data: | + version: v1alpha1 + machine: + type: controlplane + token: xxxxxx + ... + ... + ... + ... + ... +``` + +Note that specifying the full config above removes the ability for our control plane provider to generate a talosconfig for use. +As such, you should keep track of the talosconfig that's generated when running `talosctl config generate`. diff --git a/controllers/taloscontrolplane_controller.go b/controllers/taloscontrolplane_controller.go index 9cb4b2d..89b18d0 100644 --- a/controllers/taloscontrolplane_controller.go +++ b/controllers/taloscontrolplane_controller.go @@ -7,6 +7,7 @@ package controllers import ( "context" "fmt" + "io" "math/rand" "reflect" "sort" @@ -256,7 +257,7 @@ func (r *TalosControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Re // Create new Machine w/ init logger.Info("Initializing control plane", "Desired", desiredReplicas, "Existing", numMachines) - return r.bootControlPlane(ctx, cluster, tcp, controlPlane) + return r.bootControlPlane(ctx, cluster, tcp, controlPlane, true) // We are scaling up case numMachines < desiredReplicas && numMachines > 0: conditions.MarkFalse(tcp, controlplanev1.ResizedCondition, controlplanev1.ScalingUpReason, clusterv1.ConditionSeverityWarning, @@ -266,7 +267,7 @@ func (r *TalosControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Re // Create a new Machine w/ join logger.Info("Scaling up control plane", "Desired", desiredReplicas, "Existing", numMachines) - return r.bootControlPlane(ctx, cluster, tcp, controlPlane) + return r.bootControlPlane(ctx, cluster, tcp, controlPlane, false) // We are scaling down case numMachines > desiredReplicas: conditions.MarkFalse(tcp, controlplanev1.ResizedCondition, controlplanev1.ScalingDownReason, clusterv1.ConditionSeverityWarning, @@ -306,15 +307,7 @@ func (r *TalosControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Re return res, err default: - if !tcp.Status.Bootstrapped { - if !reflect.ValueOf(tcp.Spec.ControlPlaneConfig.InitConfig).IsZero() { - reason := "spec.controlPlaneConfig.init config is deprecated, to fix it keep controlplane config only" - - conditions.MarkFalse(tcp, controlplanev1.MachinesBootstrapped, controlplanev1.InvalidControlPlaneConfigReason, clusterv1.ConditionSeverityError, reason) - - return ctrl.Result{}, fmt.Errorf(reason) - } - + if !tcp.Status.Bootstrapped && reflect.ValueOf(tcp.Spec.ControlPlaneConfig.InitConfig).IsZero() { if err := r.bootstrapCluster(ctx, cluster, ownedMachines); err != nil { conditions.MarkFalse(tcp, controlplanev1.MachinesBootstrapped, controlplanev1.WaitingForTalosBootReason, clusterv1.ConditionSeverityInfo, err.Error()) @@ -548,7 +541,7 @@ func (r *TalosControlPlaneReconciler) getFailureDomain(ctx context.Context, clus return retList } -func (r *TalosControlPlaneReconciler) bootControlPlane(ctx context.Context, cluster *clusterv1.Cluster, tcp *controlplanev1.TalosControlPlane, controlPlane *ControlPlane) (ctrl.Result, error) { +func (r *TalosControlPlaneReconciler) bootControlPlane(ctx context.Context, cluster *clusterv1.Cluster, tcp *controlplanev1.TalosControlPlane, controlPlane *ControlPlane, first bool) (ctrl.Result, error) { // Since the cloned resource should eventually have a controller ref for the Machine, we create an // OwnerReference here without the Controller field set infraCloneOwner := &metav1.OwnerReference{ @@ -574,6 +567,9 @@ func (r *TalosControlPlaneReconciler) bootControlPlane(ctx context.Context, clus } bootstrapConfig := &tcp.Spec.ControlPlaneConfig.ControlPlaneConfig + if !reflect.ValueOf(tcp.Spec.ControlPlaneConfig.InitConfig).IsZero() && first { + bootstrapConfig = &tcp.Spec.ControlPlaneConfig.InitConfig + } // Clone the bootstrap configuration bootstrapRef, err := r.generateTalosConfig(ctx, tcp, cluster, bootstrapConfig) @@ -648,6 +644,28 @@ func (r *TalosControlPlaneReconciler) bootstrapCluster(ctx context.Context, clus return fmt.Errorf("no machine addresses to use for bootstrap") } + list, err := c.LS(talosclient.WithNodes(ctx, addresses...), &machineapi.ListRequest{Root: "/var/lib/etcd/member"}) + if err != nil { + return err + } + + for { + info, err := list.Recv() + if err != nil { + if errors.Is(err, io.EOF) || talosclient.StatusCode(err) == codes.Canceled { + break + } + + return err + } + + // if the directory exists at least on a single node it means that cluster + // was already bootstrapped + if info.Metadata.Error == "" { + return nil + } + } + sort.Strings(addresses) if err := c.Bootstrap(talosclient.WithNodes(ctx, addresses[0]), &machineapi.BootstrapRequest{}); err != nil { diff --git a/internal/integration/integration_test.go b/internal/integration/integration_test.go index 2ac1da4..2055eb5 100644 --- a/internal/integration/integration_test.go +++ b/internal/integration/integration_test.go @@ -151,6 +151,7 @@ func (suite *IntegrationSuite) SetupSuite() { capi.WithProvider(provider.Name()), capi.WithKubernetesVersion(strings.TrimLeft(env("WORKLOAD_KUBERNETES_VERSION", env("K8S_VERSION", "v1.22.2")), "v")), capi.WithTemplateFile("https://github.com/talos-systems/cluster-api-templates/blob/v1beta1/aws/standard/standard.yaml"), + capi.WithControlPlaneNodes(3), ) suite.Require().NoError(err) @@ -168,17 +169,8 @@ func (suite *IntegrationSuite) TearDownSuite() { } } -// Test01ScaleUp scale control plane nodes up. -func (suite *IntegrationSuite) Test01ScaleUp() { - suite.cluster.Scale(suite.ctx, 3, capi.ControlPlaneNodes) //nolint:errcheck - - suite.Require().NoError(suite.cluster.Health(suite.ctx)) - - time.Sleep(2 * time.Second) -} - -// Test02ReconcileMachine remove a machine and wait until cluster gets healthy again. -func (suite *IntegrationSuite) Test02ReconcileMachine() { +// Test01ReconcileMachine remove a machine and wait until cluster gets healthy again. +func (suite *IntegrationSuite) Test01ReconcileMachine() { selector, err := labels.Parse("cluster.x-k8s.io/control-plane") suite.Require().NoError(err) @@ -262,7 +254,7 @@ func (suite *IntegrationSuite) Test02ReconcileMachine() { ) } -// Test03ScaleDown scale control planes down. +// Test02ScaleDown scale control planes down. func (suite *IntegrationSuite) Test03ScaleDown() { suite.Require().NoError(suite.cluster.Sync(suite.ctx)) @@ -274,7 +266,7 @@ func (suite *IntegrationSuite) Test03ScaleDown() { suite.Require().NoError(suite.cluster.Sync(suite.ctx)) } -// Test04ScaleControlPlaneNoWait scale control plane nodes up and down without waiting. +// Test03ScaleControlPlaneNoWait scale control plane nodes up and down without waiting. func (suite *IntegrationSuite) Test04ScaleControlPlaneNoWait() { ctx, cancel := context.WithCancel(suite.ctx) @@ -290,7 +282,7 @@ func (suite *IntegrationSuite) Test04ScaleControlPlaneNoWait() { suite.Require().NoError(err) } -// Test05ScaleControlPlaneToZero try to scale control plane to zero and check that it never does that. +// Test04ScaleControlPlaneToZero try to scale control plane to zero and check that it never does that. func (suite *IntegrationSuite) Test05ScaleControlPlaneToZero() { ctx, cancel := context.WithCancel(suite.ctx)