From 6c81518b172ba09efbc46f15e33706d7340cdecf Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 13 Apr 2022 23:20:32 +0300 Subject: [PATCH] docs: fork docs for Sidero 0.6 Revert changes for deployment strategy, as they're not actually in 0.5. Signed-off-by: Andrey Smirnov --- website/config.toml | 10 +- .../Getting Started/install-clusterapi.md | 1 - website/content/v0.5/Guides/bootstrapping.md | 1 - website/content/v0.5/Guides/sidero-on-rpi4.md | 2 +- website/content/v0.5/Overview/installation.md | 2 +- .../content/v0.6/Getting Started/_index.md | 4 + .../v0.6/Getting Started/create-workload.md | 125 ++++++++ .../v0.6/Getting Started/expose-services.md | 41 +++ .../v0.6/Getting Started/import-machines.md | 73 +++++ website/content/v0.6/Getting Started/index.md | 61 ++++ .../Getting Started/install-clusterapi.md | 69 +++++ website/content/v0.6/Getting Started/pivot.md | 44 +++ .../v0.6/Getting Started/prereq-cli-tools.md | 60 ++++ .../v0.6/Getting Started/prereq-dhcp.md | 145 +++++++++ .../v0.6/Getting Started/prereq-kubernetes.md | 87 ++++++ .../v0.6/Getting Started/scale-workload.md | 14 + .../v0.6/Getting Started/troubleshooting.md | 77 +++++ website/content/v0.6/Guides/_index.md | 4 + website/content/v0.6/Guides/bootstrapping.md | 284 ++++++++++++++++++ .../content/v0.6/Guides/decommissioning.md | 26 ++ website/content/v0.6/Guides/first-cluster.md | 150 +++++++++ website/content/v0.6/Guides/flow.md | 81 +++++ website/content/v0.6/Guides/iso.md | 23 ++ website/content/v0.6/Guides/patching.md | 57 ++++ .../content/v0.6/Guides/rpi4-as-servers.md | 268 +++++++++++++++++ website/content/v0.6/Guides/sidero-on-rpi4.md | 158 ++++++++++ website/content/v0.6/Guides/upgrades.md | 66 ++++ website/content/v0.6/Overview/_index.md | 4 + website/content/v0.6/Overview/architecture.md | 11 + website/content/v0.6/Overview/installation.md | 42 +++ website/content/v0.6/Overview/introduction.md | 29 ++ .../v0.6/Overview/minimum-requirements.md | 20 ++ website/content/v0.6/Overview/resources.md | 154 ++++++++++ website/content/v0.6/Overview/siderolink.md | 124 ++++++++ website/content/v0.6/Overview/whatsnew.md | 65 ++++ .../v0.6/Resource Configuration/_index.md | 4 + .../Resource Configuration/environments.md | 76 +++++ .../v0.6/Resource Configuration/metadata.md | 167 ++++++++++ .../Resource Configuration/serverclasses.md | 117 ++++++++ .../v0.6/Resource Configuration/servers.md | 183 +++++++++++ website/content/v0.6/_index.md | 27 ++ .../layouts/partials/version-banner.html | 4 +- 42 files changed, 2951 insertions(+), 9 deletions(-) create mode 100644 website/content/v0.6/Getting Started/_index.md create mode 100644 website/content/v0.6/Getting Started/create-workload.md create mode 100644 website/content/v0.6/Getting Started/expose-services.md create mode 100644 website/content/v0.6/Getting Started/import-machines.md create mode 100644 website/content/v0.6/Getting Started/index.md create mode 100644 website/content/v0.6/Getting Started/install-clusterapi.md create mode 100644 website/content/v0.6/Getting Started/pivot.md create mode 100644 website/content/v0.6/Getting Started/prereq-cli-tools.md create mode 100644 website/content/v0.6/Getting Started/prereq-dhcp.md create mode 100644 website/content/v0.6/Getting Started/prereq-kubernetes.md create mode 100644 website/content/v0.6/Getting Started/scale-workload.md create mode 100644 website/content/v0.6/Getting Started/troubleshooting.md create mode 100644 website/content/v0.6/Guides/_index.md create mode 100644 website/content/v0.6/Guides/bootstrapping.md create mode 100644 website/content/v0.6/Guides/decommissioning.md create mode 100644 website/content/v0.6/Guides/first-cluster.md create mode 100644 website/content/v0.6/Guides/flow.md create mode 100644 website/content/v0.6/Guides/iso.md create mode 100644 website/content/v0.6/Guides/patching.md create mode 100644 website/content/v0.6/Guides/rpi4-as-servers.md create mode 100644 website/content/v0.6/Guides/sidero-on-rpi4.md create mode 100644 website/content/v0.6/Guides/upgrades.md create mode 100644 website/content/v0.6/Overview/_index.md create mode 100644 website/content/v0.6/Overview/architecture.md create mode 100644 website/content/v0.6/Overview/installation.md create mode 100755 website/content/v0.6/Overview/introduction.md create mode 100644 website/content/v0.6/Overview/minimum-requirements.md create mode 100644 website/content/v0.6/Overview/resources.md create mode 100644 website/content/v0.6/Overview/siderolink.md create mode 100644 website/content/v0.6/Overview/whatsnew.md create mode 100644 website/content/v0.6/Resource Configuration/_index.md create mode 100644 website/content/v0.6/Resource Configuration/environments.md create mode 100644 website/content/v0.6/Resource Configuration/metadata.md create mode 100644 website/content/v0.6/Resource Configuration/serverclasses.md create mode 100644 website/content/v0.6/Resource Configuration/servers.md create mode 100644 website/content/v0.6/_index.md diff --git a/website/config.toml b/website/config.toml index a80e53096..0ff98b4d2 100644 --- a/website/config.toml +++ b/website/config.toml @@ -85,13 +85,13 @@ copyright = "Sidero Labs, Inc." # This menu appears only if you have at least one [params.versions] set. version_menu = "Releases" -# Flag used in the "version-banner" partial to decide whether to display a +# Flag used in the "version-banner" partial to decide whether to display a # banner on every page indicating that this is an archived version of the docs. # Set this flag to "true" if you want to display the banner. # archived_version = false # The version number for the version of the docs represented in this doc set. -# Used in the "version-banner" partial to display a version number for the +# Used in the "version-banner" partial to display a version number for the # current doc set. # version = "0.6" @@ -124,6 +124,10 @@ offlineSearch = false # Enable syntax highlighting and copy buttons on code blocks with Prism prism_syntax_highlighting = false +[[params.versions]] +url = "/v0.6" +version = "v0.6 (pre-release)" + [[params.versions]] url = "/v0.5" version = "v0.5 (latest)" @@ -170,7 +174,7 @@ no = 'Sorry to hear that. Please tell us how we can improve.' # Adds a reading time to the top of each doc. -# If you want this feature, but occasionally need to remove the Reading time from a single page, +# If you want this feature, but occasionally need to remove the Reading time from a single page, # add "hide_readingtime: true" to the page's front matter [params.ui.readingtime] enable = false diff --git a/website/content/v0.5/Getting Started/install-clusterapi.md b/website/content/v0.5/Getting Started/install-clusterapi.md index 0e3f06804..f1531216a 100644 --- a/website/content/v0.5/Getting Started/install-clusterapi.md +++ b/website/content/v0.5/Getting Started/install-clusterapi.md @@ -24,7 +24,6 @@ options. ```bash export SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true -export SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate export SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=192.168.1.150 export SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT=192.168.1.150 diff --git a/website/content/v0.5/Guides/bootstrapping.md b/website/content/v0.5/Guides/bootstrapping.md index bbb56fb3b..418f10641 100644 --- a/website/content/v0.5/Guides/bootstrapping.md +++ b/website/content/v0.5/Guides/bootstrapping.md @@ -147,7 +147,6 @@ To install Sidero and the other Talos providers, simply issue: ```bash SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true \ - SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate \ SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=$PUBLIC_IP \ clusterctl init -b talos -c talos -i sidero ``` diff --git a/website/content/v0.5/Guides/sidero-on-rpi4.md b/website/content/v0.5/Guides/sidero-on-rpi4.md index 96c095efe..da0be73b6 100644 --- a/website/content/v0.5/Guides/sidero-on-rpi4.md +++ b/website/content/v0.5/Guides/sidero-on-rpi4.md @@ -104,7 +104,7 @@ kubectl get nodes Install Sidero with host network mode, exposing the endpoints on the node's address: ```bash -SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=${SIDERO_IP} clusterctl init -i sidero -b talos -c talos +SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=${SIDERO_IP} clusterctl init -i sidero -b talos -c talos ``` Watch the progress of installation with: diff --git a/website/content/v0.5/Overview/installation.md b/website/content/v0.5/Overview/installation.md index 82d5e3f3c..ce5e2dff0 100644 --- a/website/content/v0.5/Overview/installation.md +++ b/website/content/v0.5/Overview/installation.md @@ -14,7 +14,7 @@ Sidero supports several variables to configure the installation, these variables variables or as variables in the `clusterctl` configuration: - `SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` (`false`): run `sidero-controller-manager` on host network -- `SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY` (`RollingUpdate`): strategy to use when updating `sidero-controller-manager`, use `Recreate` when using a single node and `SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` is `true` +`SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` is `true` - `SIDERO_CONTROLLER_MANAGER_API_ENDPOINT` (empty): specifies the IP address controller manager API service can be reached on, defaults to the node IP (TCP) - `SIDERO_CONTROLLER_MANAGER_API_PORT` (8081): specifies the port controller manager can be reached on - `SIDERO_CONTROLLER_MANAGER_CONTAINER_API_PORT` (8081): specifies the controller manager internal container port diff --git a/website/content/v0.6/Getting Started/_index.md b/website/content/v0.6/Getting Started/_index.md new file mode 100644 index 000000000..eefe52f79 --- /dev/null +++ b/website/content/v0.6/Getting Started/_index.md @@ -0,0 +1,4 @@ +--- +title: "Getting Started" +weight: 20 +--- \ No newline at end of file diff --git a/website/content/v0.6/Getting Started/create-workload.md b/website/content/v0.6/Getting Started/create-workload.md new file mode 100644 index 000000000..239e38de3 --- /dev/null +++ b/website/content/v0.6/Getting Started/create-workload.md @@ -0,0 +1,125 @@ +--- +description: "Create a Workload Cluster" +weight: 8 +title: "Create a Workload Cluster" +--- + +Once created and accepted, you should see the servers that make up your ServerClasses appear as "available": + +```bash +$ kubectl get serverclass +NAME AVAILABLE IN USE +any ["00000000-0000-0000-0000-d05099d33360"] [] +``` + +## Generate Cluster Manifests + +We are now ready to generate the configuration manifest templates for our first workload +cluster. + +There are several configuration parameters that should be set in order for the templating to work properly: + +- `CONTROL_PLANE_ENDPOINT`: The endpoint used for the Kubernetes API server (e.g. `https://1.2.3.4:6443`). + This is the equivalent of the `endpoint` you would specify in `talosctl gen config`. + There are a variety of ways to configure a control plane endpoint. + Some common ways for an HA setup are to use DNS, a load balancer, or BGP. + A simpler method is to use the IP of a single node. + This has the disadvantage of being a single point of failure, but it can be a simple way to get running. +- `CONTROL_PLANE_SERVERCLASS`: The server class to use for control plane nodes. +- `WORKER_SERVERCLASS`: The server class to use for worker nodes. +- `KUBERNETES_VERSION`: The version of Kubernetes to deploy (e.g. `v1.21.1`). +- `CONTROL_PLANE_PORT`: The port used for the Kubernetes API server (port 6443) + +For instance: + +```bash +export CONTROL_PLANE_SERVERCLASS=any +export WORKER_SERVERCLASS=any +export TALOS_VERSION=v0.14.0 +export KUBERNETES_VERSION=v1.22.2 +export CONTROL_PLANE_PORT=6443 +export CONTROL_PLANE_ENDPOINT=1.2.3.4 + +clusterctl generate cluster cluster-0 -i sidero > cluster-0.yaml +``` + +Take a look at this new `cluster-0.yaml` manifest and make any changes as you +see fit. +Feel free to adjust the `replicas` field of the `TalosControlPlane` and `MachineDeployment` objects to match the number of machines you want in your controlplane and worker sets, respecively. +`MachineDeployment` (worker) count is allowed to be 0. + +Of course, these may also be scaled up or down _after_ they have been created, +as well. + +## Create the Cluster + +When you are satisfied with your configuration, go ahead and apply it to Sidero: + +```bash +kubectl apply -f cluster-0.yaml +``` + +At this point, Sidero will allocate Servers according to the requests in the +cluster manifest. +Once allocated, each of those machines will be installed with Talos, given their +configuration, and form a cluster. + +You can watch the progress of the Servers being selected: + +```bash +watch kubectl --context=sidero-demo \ + get servers,machines,clusters +``` + +First, you should see the Cluster created in the `Provisioning` phase. +Once the Cluster is `Provisioned`, a Machine will be created in the +`Provisioning` phase. + +![machine provisioning](/images/sidero-cluster-start.png) + +During the `Provisioning` phase, a Server will become allocated, the hardware +will be powered up, Talos will be installed onto it, and it will be rebooted +into Talos. +Depending on the hardware involved, this may take several minutes. + +Eventually, the Machine should reach the `Running` phase. + +![machine_running](/images/sidero-cluster-up.png) + +The initial controlplane Machine will always be started first. +Any additional nodes will be started after that and will join the cluster when +they are ready. + +## Retrieve the Talosconfig + +In order to interact with the new machines (outside of Kubernetes), you will +need to obtain the `talosctl` client configuration, or `talosconfig`. +You can do this by retrieving the secret from the Sidero +management cluster: + +```bash +kubectl --context=sidero-demo \ + get secret \ + cluster-0-talosconfig \ + -o jsonpath='{.data.talosconfig}' \ + | base64 -d \ + > cluster-0-talosconfig +``` + +## Retrieve the Kubeconfig + +With the talosconfig obtained, the workload cluster's kubeconfig can be retrieved in the normal Talos way: + +```bash +talosctl --talosconfig cluster-0-talosconfig --nodes kubeconfig +``` + +## Check access + +Now, you should have two cluster available: you management cluster +(`sidero-demo`) and your workload cluster (`cluster-0`). + +```bash +kubectl --context=sidero-demo get nodes +kubectl --context=cluster-0 get nodes +``` diff --git a/website/content/v0.6/Getting Started/expose-services.md b/website/content/v0.6/Getting Started/expose-services.md new file mode 100644 index 000000000..831d2c2bb --- /dev/null +++ b/website/content/v0.6/Getting Started/expose-services.md @@ -0,0 +1,41 @@ +--- +description: "A guide for bootstrapping Sidero management plane" +weight: 6 +title: "Expose Sidero Services" +--- + +> If you built your cluster as specified in the [Prerequisite: Kubernetes] section in this tutorial, your services are already exposed and you can skip this section. + +There are three external Services which Sidero serves and which much be made +reachable by the servers which it will be driving. + +For most servers, TFTP (port 69/udp) will be needed. +This is used for PXE booting, both BIOS and UEFI. +Being a primitive UDP protocol, many load balancers do not support TFTP. +Instead, solutions such as [MetalLB](https://metallb.universe.tf) may be used to expose TFTP over a known IP address. +For servers which support UEFI HTTP Network Boot, TFTP need not be used. + +The kernel, initrd, and all configuration assets are served from the HTTP service +(port 8081/tcp). +It is needed for all servers, but since it is HTTP-based, it +can be easily proxied, load balanced, or run through an ingress controller. + +Overlay Wireguard SideroLink network requires UDP port 51821 to be open. +Same as TFTP, many load balancers do not support Wireguard UDP protocol. +Instead, use MetalLB. + +The main thing to keep in mind is that the services **MUST** match the IP or +hostname specified by the `SIDERO_CONTROLLER_MANAGER_API_ENDPOINT` and +`SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT` environment +variables (or configuration parameters) when you installed Sidero. + +It is a good idea to verify that the services are exposed as you think they +should be. + +```bash +$ curl -I http://192.168.1.150:8081/tftp/ipxe.efi +HTTP/1.1 200 OK +Accept-Ranges: bytes +Content-Length: 1020416 +Content-Type: application/octet-stream +``` diff --git a/website/content/v0.6/Getting Started/import-machines.md b/website/content/v0.6/Getting Started/import-machines.md new file mode 100644 index 000000000..f0724ebd0 --- /dev/null +++ b/website/content/v0.6/Getting Started/import-machines.md @@ -0,0 +1,73 @@ +--- +description: "A guide for bootstrapping Sidero management plane" +weight: 7 +title: "Import Workload Machines" +--- + +At this point, any servers on the same network as Sidero should network boot from Sidero. +To register a server with Sidero, simply turn it on and Sidero will do the rest. +Once the registration is complete, you should see the servers registered with `kubectl get servers`: + +```bash +$ kubectl get servers -o wide +NAME HOSTNAME ACCEPTED ALLOCATED CLEAN +00000000-0000-0000-0000-d05099d33360 192.168.1.201 false false false +``` + +## Accept the Servers + +Note in the output above that the newly registered servers are not `accepted`. +In order for a server to be eligible for consideration, it _must_ be marked as `accepted`. +Before a `Server` is accepted, no write action will be performed against it. +This default is for safety (don't accidentally delete something just because it +was plugged in) and security (make sure you know the machine before it is given +credentials to communicate). + +> Note: if you are running in a safe environment, you can configure Sidero to +> automatically accept new machines. + +For more information on server acceptance, see the [server docs](../../resource-configuration/servers/#server-acceptance). + +## Create ServerClasses + +By default, Sidero comes with a single ServerClass `any` which matches any +(accepted) server. +This is sufficient for this demo, but you may wish to have +more flexibility by defining your own ServerClasses. + +ServerClasses allow you to group machines which are sufficiently similar to +allow for unnamed allocation. +This is analogous to cloud providers using such classes as `m3.large` or +`c2.small`, but the names are free-form and only need to make sense to you. + +For more information on ServerClasses, see the [ServerClass +docs](../../resource-configuration/serverclasses/). + +## Hardware differences + +In baremetal systems, there are commonly certain small features and +configurations which are unique to the hardware. +In many cases, such small variations may not require special configurations, but +others do. + +If hardware-specific differences do mandate configuration changes, we need a way +to keep those changes local to the hardware specification so that at the higher +level, a Server is just a Server (or a server in a ServerClass is just a Server +like all the others in that Class). + +The most common variations seem to be the installation disk and the console +serial port. + +Some machines have NVMe drives, which show up as something like `/dev/nvme0n1`. +Others may be SATA or SCSI, which show up as something like `/dev/sda`. +Some machines use `/dev/ttyS0` for the serial console; others `/dev/ttyS1`. + +Configuration patches can be applied to either Servers or ServerClasses, and +those patches will be applied to the final machine configuration for those +nodes without having to know anything about those nodes at the allocation level. + +For examples of install disk patching, see the [Installation Disk +doc](../../resource-configuration/servers/#installation-disk). + +For more information about patching in general, see the [Patching +Guide](../../guides/patching). diff --git a/website/content/v0.6/Getting Started/index.md b/website/content/v0.6/Getting Started/index.md new file mode 100644 index 000000000..b44af08f0 --- /dev/null +++ b/website/content/v0.6/Getting Started/index.md @@ -0,0 +1,61 @@ +--- +description: "Overview" +weight: 1 +title: "Overview" +--- + +This tutorial will walk you through a complete Sidero setup and the formation, +scaling, and destruction of a workload cluster. + +To complete this tutorial, you will need a few things: + +- ISC DHCP server. + While any DHCP server will do, we will be presenting the + configuration syntax for ISC DHCP. + This is the standard DHCP server available on most Linux distributions (NOT + dnsmasq) as well as on the Ubiquiti EdgeRouter line of products. +- Machine or Virtual Machine on which to run Sidero itself. + The requirements for this machine are very low, it can be x86 or arm64 + and it should have at least 4GB of RAM. +- Machines on which to run Kubernetes clusters. + These have the same minimum specifications as the Sidero machine. +- Workstation on which `talosctl`, `kubectl`, and `clusterctl` can be run. + +## Steps + +1. Prerequisite: CLI tools +1. Prerequisite: DHCP server +1. Prerequisite: Kubernetes +1. Install Sidero +1. Expose services +1. Import workload machines +1. Create a workload cluster +1. Scale the workload cluster +1. Destroy the workload cluster +1. Optional: Pivot management cluster + +## Useful Terms + +**ClusterAPI** or **CAPI** is the common system for managing Kubernetes clusters +in a declarative fashion. + +**Management Cluster** is the cluster on which Sidero itself runs. +It is generally a special-purpose Kubernetes cluster whose sole responsibility +is maintaining the CRD database of Sidero and providing the services necessary +to manage your workload Kubernetes clusters. + +**Sidero** is the ClusterAPI-powered system which manages baremetal +infrastructure for Kubernetes. + +**Talos** is the Kubernetes-focused Linux operating system built by the same +people who bring to you Sidero. +It is a very small, entirely API-driven OS which is meant to provide a reliable +and self-maintaining base on which Kubernetes clusters may run. +More information about Talos can be found at +[https://talos.dev](https://talos.dev). + +**Workload Cluster** is a cluster, managed by Sidero, on which your Kubernetes +workloads may be run. +The workload clusters are where you run your own applications and infrastructure. +Sidero creates them from your available resources, maintains them over time as +your needs and resources change, and removes them whenever it is told to do so. diff --git a/website/content/v0.6/Getting Started/install-clusterapi.md b/website/content/v0.6/Getting Started/install-clusterapi.md new file mode 100644 index 000000000..0e3f06804 --- /dev/null +++ b/website/content/v0.6/Getting Started/install-clusterapi.md @@ -0,0 +1,69 @@ +--- +description: "Install Sidero" +weight: 5 +title: "Install Sidero" +--- + +Sidero is included as a default infrastructure provider in `clusterctl`, so the +installation of both Sidero and the Cluster API (CAPI) components is as simple +as using the `clusterctl` tool. + +> Note: Because Cluster API upgrades are _stateless_, it is important to keep all Sidero +> configuration for reuse during upgrades. + +Sidero has a number of configuration options which should be supplied at install +time, kept, and reused for upgrades. +These can also be specified in the `clusterctl` configuration file +(`$HOME/.cluster-api/clusterctl.yaml`). +You can reference the `clusterctl` +[docs](https://cluster-api.sigs.k8s.io/clusterctl/configuration.html#clusterctl-configuration-file) +for more information on this. + +For our purposes, we will use environment variables for our configuration +options. + +```bash +export SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true +export SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate +export SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=192.168.1.150 +export SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT=192.168.1.150 + +clusterctl init -b talos -c talos -i sidero +``` + +First, we are telling Sidero to use `hostNetwork: true` so that it binds its +ports directly to the host, rather than being available only from inside the +cluster. +There are many ways of exposing the services, but this is the simplest +path for the single-node management cluster. +When you scale the management cluster, you will need to use an alternative +method, such as an external load balancer or something like +[MetalLB](https://metallb.universe.tf). + +The `192.168.1.150` IP address is the IP address or DNS hostname as seen from the workload +clusters. +In our case, this should be the main IP address of your Docker +workstation. + +> Note: If you encounter the following error, this is caused by a rename of our GitHub org from `talos-systems` to `siderolabs`. + +```bash +$ clusterctl init -b talos -c talos -i sidero +Fetching providers +Error: failed to get provider components for the "talos" provider: target namespace can't be defaulted. Please specify a target namespace +``` + +> This can be worked around by adding the following to `~/.cluster-api/clusterctl.yaml` and rerunning the init command: + +```yaml +providers: + - name: "talos" + url: "https://github.com/siderolabs/cluster-api-bootstrap-provider-talos/releases/latest/bootstrap-components.yaml" + type: "BootstrapProvider" + - name: "talos" + url: "https://github.com/siderolabs/cluster-api-control-plane-provider-talos/releases/latest/control-plane-components.yaml" + type: "ControlPlaneProvider" + - name: "sidero" + url: "https://github.com/siderolabs/sidero/releases/latest/infrastructure-components.yaml" + type: "InfrastructureProvider" +``` diff --git a/website/content/v0.6/Getting Started/pivot.md b/website/content/v0.6/Getting Started/pivot.md new file mode 100644 index 000000000..a609667fe --- /dev/null +++ b/website/content/v0.6/Getting Started/pivot.md @@ -0,0 +1,44 @@ +--- +description: "A guide for bootstrapping Sidero management plane" +weight: 11 +title: "Optional: Pivot management cluster" +--- + +Having the Sidero cluster running inside a Docker container is not the most +robust place for it, but it did make for an expedient start. + +Conveniently, you can create a Kubernetes cluster in Sidero and then _pivot_ the +management plane over to it. + +Start by creating a workload cluster as you have already done. +In this example, this new cluster is called `management`. + +After the new cluster is available, install Sidero onto it as we did before, +making sure to set all the environment variables or configuration parameters for +the _new_ management cluster first. + +```bash +export SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=sidero.mydomain.com +export SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT=sidero.mydomain.com + +clusterctl init \ + --kubeconfig-context=management + -i sidero -b talos -c talos +``` + +Now, you can move the database from `sidero-demo` to `management`: + +```bash +clusterctl move \ + --kubeconfig-context=sidero-demo \ + --to-kubeconfig-context=management +``` + +## Delete the old Docker Management Cluster + +If you created your `sidero-demo` cluster using Docker as described in this +tutorial, you can now remove it: + +```bash +talosctl cluster destroy --name sidero-demo +``` diff --git a/website/content/v0.6/Getting Started/prereq-cli-tools.md b/website/content/v0.6/Getting Started/prereq-cli-tools.md new file mode 100644 index 000000000..63c0e87b4 --- /dev/null +++ b/website/content/v0.6/Getting Started/prereq-cli-tools.md @@ -0,0 +1,60 @@ +--- +description: "Prerequisite: CLI tools" +weight: 2 +title: "Prerequisite: CLI tools" +--- + +You will need three CLI tools installed on your workstation in order to interact +with Sidero: + +- `kubectl` +- `clusterctl` +- `talosctl` + +## Install `kubectl` + +Since `kubectl` is the standard Kubernetes control tool, many distributions +already exist for it. +Feel free to check your own package manager to see if it is available natively. + +Otherwise, you may install it directly from the main distribution point. +The main article for this can be found +[here](https://kubernetes.io/docs/tasks/tools/#kubectl). + +```bash +sudo curl -Lo /usr/local/bin/kubectl \ + "https://dl.k8s.io/release/$(\ + curl -L -s https://dl.k8s.io/release/stable.txt\ + )/bin/linux/amd64/kubectl" +sudo chmod +x /usr/local/bin/kubectl +``` + +## Install `clusterctl` + +The `clusterctl` tool is the standard control tool for ClusterAPI (CAPI). +It is less common, so it is also less likely to be in package managers. + +The main article for installing `clusterctl` can be found +[here](https://cluster-api.sigs.k8s.io/user/quick-start.html#install-clusterctl). + +```bash +sudo curl -Lo /usr/local/bin/clusterctl \ + "https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.1.1/clusterctl-$(uname -s | tr '[:upper:]' '[:lower:]')-amd64" \ +sudo chmod +x /usr/local/bin/clusterctl +``` + +> Note: This version of Sidero is only compatible with CAPI v1beta1, +> so please install the latest version of `clusterctl` v1.x. + +## Install `talosctl` + +The `talosctl` tool is used to interact with the Talos (our Kubernetes-focused +operating system) API. +The latest version can be found on our +[Releases](https://github.com/talos-systems/talos/releases) page. + +```bash +sudo curl -Lo /usr/local/bin/talosctl \ + "https://github.com/talos-systems/talos/releases/latest/download/talosctl-$(uname -s | tr '[:upper:]' '[:lower:]')-amd64" +chmod +x /usr/local/bin/talosctl +``` diff --git a/website/content/v0.6/Getting Started/prereq-dhcp.md b/website/content/v0.6/Getting Started/prereq-dhcp.md new file mode 100644 index 000000000..9027c1d25 --- /dev/null +++ b/website/content/v0.6/Getting Started/prereq-dhcp.md @@ -0,0 +1,145 @@ +--- +description: "Prerequisite: DHCP Service" +weight: 4 +title: "Prerequisite: DHCP service" +--- + +In order to network boot Talos, we need to set up our DHCP server to supply the +network boot parameters to our servers. +For maximum flexibility, Sidero makes use of iPXE to be able to reference +artifacts via HTTP. +Some modern servers support direct UEFI HTTP boot, but most existing servers +still rely on the old, slow TFTP-based PXE boot first. +Therefore, we need to tell our DHCP server to find the iPXE binary on a TFTP +server. + +Conveniently, Sidero comes with a TFTP server which will serve the appropriate +files. +We need only set up our DHCP server to point to it. + +The tricky bit is that at different phases, we need to serve different assets, +but they all use the same DHCP metadata key. + +In fact, we have as many as six different client types: + +- Legacy BIOS-based PXE boot (undionly.kpxe via TFTP) +- UEFI-based PXE boot (ipxe.efi via TFTP) +- UEFI HTTP boot (ipxe.efi via HTTP URL) +- iPXE (boot.ipxe via HTTP URL) +- UEFI-based PXE arm64 boot (ipxe-arm64.efi via TFTP) +- UEFI HTTP boot on arm64 (ipxe-arm64.efi via HTTP URL) + +## Common client types + +If you are lucky and all of the machines in a given DHCP zone can use the same +network boot client mechanism, your DHCP server only needs to provide two +options: + +- `Server-Name` (option 66) with the IP of the Sidero TFTP service +- `Bootfile-Name` (option 67) with the appropriate value for the boot client type: + - Legacy BIOS PXE boot: `undionly.kpxe` + - UEFI-based PXE boot: `ipxe.efi` + - UEFI HTTP boot: `http://sidero-server-url/tftp/ipxe.efi` + - iPXE boot: `http://sidero-server-url/boot.ipxe` + - arm64 UEFI PXE boot: `ipxe-arm64.efi` + - arm64 UEFI HTTP boot: `http://sidero-server-url/tftp/ipxe-arm64.efi` + +In the ISC DHCP server, these options look like: + +```text +next-server 172.16.199.50; +filename "ipxe.efi"; +``` + +## Multiple client types + +Any given server will usually use only one of those, but if you have a mix of +machines, you may need a combination of them. +In this case, you would need a way to provide different images for different +client or machine types. + +Both ISC DHCP server and dnsmasq provide ways to supply such conditional responses. +In this tutorial, we are working with ISC DHCP. + +For modularity, we are breaking the conditional statements into a separate file +and using the `include` statement to load them into the main `dhcpd.conf` file. + +In our example below, `172.16.199.50` is the IP address of our Sidero service. + +`ipxe-metal.conf`: + +```text +allow bootp; +allow booting; + +# IP address for PXE-based TFTP methods +next-server 172.16.199.50; + +# Configuration for iPXE clients +class "ipxeclient" { + match if exists user-class and (option user-class = "iPXE"); + filename "http://172.16.199.50/boot.ipxe"; +} + +# Configuration for legacy BIOS-based PXE boot +class "biosclients" { + match if not exists user-class and substring (option vendor-class-identifier, 15, 5) = "00000"; + filename "undionly.kpxe"; +} + +# Configuration for UEFI-based PXE boot +class "pxeclients" { + match if not exists user-class and substring (option vendor-class-identifier, 0, 9) = "PXEClient"; + filename "ipxe.efi"; +} + +# Configuration for UEFI-based HTTP boot +class "httpclients" { + match if not exists user-class and substring (option vendor-class-identifier, 0, 10) = "HTTPClient"; + option vendor-class-identifier "HTTPClient"; + filename "http://172.16.199.50/tftp/ipxe.efi"; +} +``` + +Once this file is created, we can include it from our main `dhcpd.conf` inside a +`subnet` section. + +```text +shared-network sidero { + subnet 172.16.199.0 netmask 255.255.255.0 { + option domain-name-servers 8.8.8.8, 1.1.1.1; + option routers 172.16.199.1; + include "/etc/dhcp/ipxe-metal.conf"; + } +} +``` + +Since we use a number of Ubiquiti EdgeRouter devices especially in our home test +networks, it is worth mentioning the curious syntax gymnastics we must go +through there. +Essentially, the quotes around the path need to be entered as HTML entities: +`"`. + +Ubiquiti EdgeRouter configuration statement: + +```text +set service dhcp-server shared-network-name sidero \ + subnet 172.16.199.1 \ + subnet-parameters "include "/etc/dhcp/ipxe-metal.conf";" +``` + +Also note the fact that there are two semicolons at the end of the line. +The first is part of the HTML-encoded **"** (`"`) and the second is the actual terminating semicolon. + +## Troubleshooting + +Getting the netboot environment is tricky and debugging it is difficult. +Once running, it will generally stay running; +the problem is nearly always one of a missing or incorrect configuration, since +the process involves several different components. + +We are working toward integrating as much as possible into Sidero, to provide as +much intelligence and automation as can be had, but until then, you will likely +need to figure out how to begin hunting down problems. + +See the Sidero [Troubleshooting](../troubleshooting) guide for more assistance. diff --git a/website/content/v0.6/Getting Started/prereq-kubernetes.md b/website/content/v0.6/Getting Started/prereq-kubernetes.md new file mode 100644 index 000000000..c923829d9 --- /dev/null +++ b/website/content/v0.6/Getting Started/prereq-kubernetes.md @@ -0,0 +1,87 @@ +--- +description: "Prerequisite: Kubernetes" +weight: 3 +title: "Prerequisite: Kubernetes" +--- + +In order to run Sidero, you first need a Kubernetes "cluster". +There is nothing special about this cluster. +It can be, for example: + +- a Kubernetes cluster you already have +- a single-node cluster running in Docker on your laptop +- a cluster running inside a virtual machine stack such as VMWare +- a Talos Kubernetes cluster running on a spare machine + +Two important things are needed in this cluster: + +- Kubernetes `v1.19` or later +- Ability to expose TCP and UDP Services to the workload cluster machines + +For the purposes of this tutorial, we will create this cluster in Docker on a +workstation, perhaps a laptop. + +If you already have a suitable Kubernetes cluster, feel free to skip this step. + +## Create a Local Management Cluster + +The `talosctl` CLI tool has built-in support for spinning up Talos in docker containers. +Let's use this to our advantage as an easy Kubernetes cluster to start from. + +Issue the following to create a single-node Docker-based Kubernetes cluster: + +```bash +export HOST_IP="192.168.1.150" + +talosctl cluster create \ + --name sidero-demo \ + -p 69:69/udp,8081:8081/tcp,51821:51821/udp \ + --workers 0 \ + --config-patch '[{"op": "add", "path": "/cluster/allowSchedulingOnMasters", "value": true}]' \ + --endpoint $HOST_IP +``` + +The `192.168.1.150` IP address should be changed to the IP address of your Docker +host. +This is _not_ the Docker bridge IP but the standard IP address of the +workstation. + +Note that there are three ports mentioned in the command above. +The first (69) is +for TFTP. +The second (8081) is for the web server (which serves netboot +artifacts and configuration). +The third (51821) is for the SideroLink Wireguard network. + +Exposing them here allows us to access the services that will get deployed on this node. +In turn, we will be running our Sidero services with `hostNetwork: true`, +so the Docker host will forward these to the Docker container, +which will in turn be running in the same namespace as the Sidero Kubernetes components. +A full separate management cluster will likely approach this differently, +with a load balancer or a means of sharing an IP address across multiple nodes (such as with MetalLB). + +Finally, the `--config-patch` is optional, +but since we are running a single-node cluster in this Tutorial, +adding this will allow Sidero to run on the controlplane. +Otherwise, you would need to add worker nodes to this management plane cluster to be +able to run the Sidero components on it. + +## Access the cluster + +Once the cluster create command is complete, you can retrieve the kubeconfig for it using the Talos API: + +```bash +talosctl kubeconfig +``` + +> Note: by default, Talos will merge the kubeconfig for this cluster into your +> standard kubeconfig under the context name matching the cluster name your +> created above. +> If this name conflicts, it will be given a `-1`, a `-2` or so +> on, so it is generally safe to run. +> However, if you would prefer to not modify your standard kubeconfig, you can +> supply a directory name as the third parameter, which will cause a new +> kubeconfig to be created there instead. +> Remember that if you choose to not use the standard location, your should set +> your `KUBECONFIG` environment variable or pass the `--kubeconfig` option to +> tell the `kubectl` client the name of the `kubeconfig` file. diff --git a/website/content/v0.6/Getting Started/scale-workload.md b/website/content/v0.6/Getting Started/scale-workload.md new file mode 100644 index 000000000..06b1562d2 --- /dev/null +++ b/website/content/v0.6/Getting Started/scale-workload.md @@ -0,0 +1,14 @@ +--- +description: "A guide for bootstrapping Sidero management plane" +weight: 9 +title: "Scale the Workload Cluster" +--- + +If you have more machines available, you can scale both the controlplane +(`TalosControlPlane`) and the workers (`MachineDeployment`) for any cluster +after it has been deployed. +This is done just like normal Kubernetes `Deployments`. + +```bash +kubectl scale taloscontrolplane cluster-0-cp --replicas=3 +``` diff --git a/website/content/v0.6/Getting Started/troubleshooting.md b/website/content/v0.6/Getting Started/troubleshooting.md new file mode 100644 index 000000000..7e446c605 --- /dev/null +++ b/website/content/v0.6/Getting Started/troubleshooting.md @@ -0,0 +1,77 @@ +--- +description: "Troubleshooting" +weight: 99 +title: "Troubleshooting" +--- + +The first thing to do in troubleshooting problems with the Sidero installation +and operation is to figure out _where_ in the process that failure is occurring. + +Keep in mind the general flow of the pieces. +For instance: + +1. A server is configured by its BIOS/CMOS to attempt a network boot using the PXE firmware on +its network card(s). +1. That firmware requests network and PXE boot configuration via DHCP. +1. DHCP points the firmware to the Sidero TFTP or HTTP server (depending on the firmware type). +1. The second stage boot, iPXE, is loaded and makes an HTTP request to the + Sidero metadata server for its configuration, which contains the URLs for + the kernel and initrd images. +1. The kernel and initrd images are downloaded by iPXE and boot into the Sidero + agent software (if the machine is not yet known and assigned by Sidero). +1. The agent software reports to the Sidero metadata server via HTTP the hardware information of the machine. +1. A (usually human or external API) operator verifies and accepts the new + machine into Sidero. +1. The agent software reboots and wipes the newly-accepted machine, then powers + off the machine to wait for allocation into a cluster. +1. The machine is allocated by Sidero into a Kubernetes Cluster. +1. Sidero tells the machine, via IPMI, to boot into the OS installer + (following all the same network boot steps above). +1. The machine downloads its configuration from the Sidero metadata server via + HTTP. +1. The machine applies its configuration, installs a bootloader, and reboots. +1. The machine, upon reboot from its local disk, joins the Kubernetes cluster + and continues until Sidero tells it to leave the cluster. +1. Sidero tells the machine to leave the cluster and reboots it into network + boot mode, via IPMI. +1. The machine netboots into wipe mode, wherein its disks are again wiped to + come back to the "clean" state. +1. The machine again shuts down and waits to be needed. + +## Device firmware (PXE boot) + +The worst place to fail is also, unfortunately, the most common. +This is the firmware phase, where the network card's built-in firmware attempts +to initiate the PXE boot process. +This is the worst place because the firmware is completely opaque, with very +little logging, and what logging _does_ appear frequently is wiped from the +console faster than you can read it. + +If you fail here, the problem will most likely be with your DHCP configuration, +though it _could_ also be in the Sidero TFTP service configuration. + +## Validate Sidero TFTP service + +The easiest to validate is to use a `tftp` client to validate that the Sidero +TFTP service is available at the IP you are advertising via DHCP. + +```bash + $ atftp 172.16.199.50 + tftp> get ipxe.efi +``` + +TFTP is an old, slow protocol with very little feedback or checking. +Your only real way of telling if this fails is by timeout. +Over a local network, this `get` command should take a few seconds. +If it takes longer than 30 seconds, it is probably not working. + +Success is also not usually indicated: +you just get a prompt returned, and the file should show up in your current +directory. + +If you are failing to connect to TFTP, the problem is most likely with your +Sidero Service exposure: +how are you exposing the TFTP service in your management cluster to the outside +world? +This normally involves either setting host networking on the Deployment or +installing and using something like MetalLB. diff --git a/website/content/v0.6/Guides/_index.md b/website/content/v0.6/Guides/_index.md new file mode 100644 index 000000000..fad8b41ed --- /dev/null +++ b/website/content/v0.6/Guides/_index.md @@ -0,0 +1,4 @@ +--- +title: "Guides" +weight: 40 +--- \ No newline at end of file diff --git a/website/content/v0.6/Guides/bootstrapping.md b/website/content/v0.6/Guides/bootstrapping.md new file mode 100644 index 000000000..bbb56fb3b --- /dev/null +++ b/website/content/v0.6/Guides/bootstrapping.md @@ -0,0 +1,284 @@ +--- +description: "A guide for bootstrapping Sidero management plane" +weight: 1 +title: "Bootstrapping" +--- + +## Introduction + +Imagine a scenario in which you have shown up to a datacenter with only a laptop and your task is to transition a rack of bare metal machines into an HA management plane and multiple Kubernetes clusters created by that management plane. +In this guide, we will go through how to create a bootstrap cluster using a Docker-based Talos cluster, provision the management plane, and pivot over to it. +Guides around post-pivoting setup and subsequent cluster creation should also be found in the "Guides" section of the sidebar. + +Because of the design of Cluster API, there is inherently a "chicken and egg" problem with needing a Kubernetes cluster in order to provision the management plane. +Talos Systems and the Cluster API community have created tools to help make this transition easier. + +## Prerequisites + +First, you need to install the latest `talosctl` by running the following script: + +```bash +curl -Lo /usr/local/bin/talosctl https://github.com/talos-systems/talos/releases/latest/download/talosctl-$(uname -s | tr "[:upper:]" "[:lower:]")-amd64 +chmod +x /usr/local/bin/talosctl +``` + +You can read more about Talos and `talosctl` at [talos.dev](https://www.talos.dev/docs/latest). + +Next, there are two big prerequisites involved with bootstrapping Sidero: routing and DHCP setup. + +From the routing side, the laptop from which you are bootstrapping _must_ be accessible by the bare metal machines that we will be booting. +In the datacenter scenario described above, the easiest way to achieve this is probably to hook the laptop onto the server rack's subnet by plugging it into the top-of-rack switch. +This is needed for TFTP, PXE booting, and for the ability to register machines with the bootstrap plane. + +DHCP configuration is needed to tell the metal servers what their "next server" is when PXE booting. +The configuration of this is different for each environment and each DHCP server, thus it's impossible to give an easy guide. +However, here is an example of the configuration for an Ubiquti EdgeRouter that uses vyatta-dhcpd as the DHCP service: + +This block shows the subnet setup, as well as the extra "subnet-parameters" that tell the DHCP server to include the ipxe-metal.conf file. + +> These commands are run under the `configure` option in EdgeRouter + +```bash +$ show service dhcp-server shared-network-name MetalDHCP + + authoritative enable + subnet 192.168.254.0/24 { + default-router 192.168.254.1 + dns-server 192.168.1.200 + lease 86400 + start 192.168.254.2 { + stop 192.168.254.252 + } + subnet-parameters "include "/etc/dhcp/ipxe-metal.conf";" + } +``` + +Here is the `ipxe-metal.conf` file. + +```bash +$ cat /etc/dhcp/ipxe-metal.conf + +allow bootp; +allow booting; + +next-server 192.168.1.150; +filename "ipxe.efi"; # use "undionly.kpxe" for BIOS netboot or "ipxe.efi" for UEFI netboot + +host talos-mgmt-0 { + fixed-address 192.168.254.2; + hardware ethernet d0:50:99:d3:33:60; +} +``` + +> If you want to boot multiple architectures, you can use the *DHCP Option 93* to specify the architecture. + +First we need to define *option 93* in the DHCP server configuration. + +```bash +set service dhcp-server global-parameters "option system-arch code 93 = unsigned integer 16;" +``` + +Now we can specify condition based on *option 93* in `ipxe-metal.conf` file + +```bash +$ cat /etc/dhcp/ipxe-metal.conf + +allow bootp; +allow booting; + +next-server 192.168.1.150; + +if option system-arch = 00:0b { + filename "ipxe-arm64.efi"; +} else { + filename "ipxe.efi"; +} + +host talos-mgmt-0 { + fixed-address 192.168.254.2; + hardware ethernet d0:50:99:d3:33:60; +} +``` + +Notice that it sets a static address for the management node that I'll be booting, in addition to providing the "next server" info. +This "next server" IP address will match references to `PUBLIC_IP` found below in this guide. + +## Create a Local Cluster + +The `talosctl` CLI tool has built-in support for spinning up Talos in docker containers. +Let's use this to our advantage as an easy Kubernetes cluster to start from. + +Set an environment variable called `PUBLIC_IP` which is the "public" IP of your machine. +Note that "public" is a bit of a misnomer. +We're really looking for the IP of your machine, not the IP of the node on the docker bridge (ex: `192.168.1.150`). + +```bash +export PUBLIC_IP="192.168.1.150" +``` + +We can now create our Docker cluster. +Issue the following to create a single-node cluster: + +```bash +talosctl cluster create \ + --kubernetes-version 1.22.2 \ + -p 69:69/udp,8081:8081/tcp,51821:51821/udp \ + --workers 0 \ + --endpoint $PUBLIC_IP +``` + +Note that there are several ports mentioned in the command above. +These allow us to access the services that will get deployed on this node. + +Once the cluster create command is complete, issue `talosctl kubeconfig /desired/path` to fetch the kubeconfig for this cluster. +You should then set your `KUBECONFIG` environment variable to the path of this file. + +## Untaint Control Plane + +Because this is a single node cluster, we need to remove the "NoSchedule" taint on the node to make sure non-controlplane components can be scheduled. + +```bash +kubectl taint node talos-default-master-1 node-role.kubernetes.io/master:NoSchedule- +``` + +## Install Sidero + +To install Sidero and the other Talos providers, simply issue: + +```bash +SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true \ + SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate \ + SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=$PUBLIC_IP \ + clusterctl init -b talos -c talos -i sidero +``` + +We will now want to ensure that the Sidero services that got created are publicly accessible across our subnet. +These variables above will allow the metal machines to speak to these services later. + +## Register the Servers + +At this point, any servers on the same network as Sidero should PXE boot using the Sidero PXE service. +To register a server with Sidero, simply turn it on and Sidero will do the rest. +Once the registration is complete, you should see the servers registered with `kubectl get servers`: + +```bash +$ kubectl get servers -o wide +NAME HOSTNAME ACCEPTED ALLOCATED CLEAN +00000000-0000-0000-0000-d05099d33360 192.168.254.2 false false false +``` + +## Setting up IPMI + +Sidero can use IPMI information to control Server power state, reboot servers and set boot order. +IPMI information will be, by default, setup automatically if possible as part of the acceptance process. +See [IPMI](../../resource-configuration/servers/#ipmi) for more information. + +IMPI connection information can also be set manually in the Server spec after initial registration: + +```bash +kubectl patch server 00000000-0000-0000-0000-d05099d33360 --type='json' -p='[{"op": "add", "path": "/spec/bmc", "value": {"endpoint": "192.168.88.9", "user": "ADMIN", "pass":"ADMIN"}}]' +``` + +If IPMI info is not set, servers should be configured to boot first from network, then from disk. + +## Configuring the installation disk + +Note that for bare-metal setup, you would need to specify an installation disk. +See [Installation Disk](../../resource-configuration/servers/#installation-disk) for details on how to do this. +You should configure this before accepting the server. + +## Accept the Servers + +Note in the output above that the newly registered servers are not `accepted`. +In order for a server to be eligible for consideration, it _must_ be marked as `accepted`. +Before a `Server` is accepted, no write action will be performed against it. +Servers can be accepted by issuing a patch command like: + +```bash +kubectl patch server 00000000-0000-0000-0000-d05099d33360 --type='json' -p='[{"op": "replace", "path": "/spec/accepted", "value": true}]' +``` + +For more information on server acceptance, see the [server docs](../../resource-configuration/servers). + +## Create Management Plane + +We are now ready to template out our management plane. +Using clusterctl, we can create a cluster manifest with: + +```bash +clusterctl generate cluster management-plane -i sidero > management-plane.yaml +``` + +Note that there are several variables that should be set in order for the templating to work properly: + +- `CONTROL_PLANE_ENDPOINT` and `CONTROL_PLANE_PORT`: The endpoint (IP address or hostname) and the port used for the Kubernetes API server + (e.g. for `https://1.2.3.4:6443`: `CONTROL_PLANE_ENDPOINT=1.2.3.4` and `CONTROL_PLANE_PORT=6443`). + This is the equivalent of the `endpoint` you would specify in `talosctl gen config`. + There are a variety of ways to configure a control plane endpoint. + Some common ways for an HA setup are to use DNS, a load balancer, or BGP. + A simpler method is to use the IP of a single node. + This has the disadvantage of being a single point of failure, but it can be a simple way to get running. +- `CONTROL_PLANE_SERVERCLASS`: The server class to use for control plane nodes. +- `WORKER_SERVERCLASS`: The server class to use for worker nodes. +- `KUBERNETES_VERSION`: The version of Kubernetes to deploy (e.g. `v1.22.2`). +- `CONTROL_PLANE_PORT`: The port used for the Kubernetes API server (port 6443) +- `TALOS_VERSION`: This should correspond to the minor version of Talos that you will be deploying (e.g. `v0.13`). + This value is used in determining the fields present in the machine configuration that gets generated for Talos nodes. + +For instance: + +```bash +export CONTROL_PLANE_SERVERCLASS=any +export WORKER_SERVERCLASS=any +export TALOS_VERSION=v0.13 +export KUBERNETES_VERSION=v1.22.2 +export CONTROL_PLANE_PORT=6443 +export CONTROL_PLANE_ENDPOINT=1.2.3.4 +clusterctl generate cluster management-plane -i sidero > management-plane.yaml +``` + +In addition, you can specify the replicas for control-plane & worker nodes in management-plane.yaml manifest for TalosControlPlane and MachineDeployment objects. +Also, they can be scaled if needed (after applying the `management-plane.yaml` manifest): + +```bash +kubectl get taloscontrolplane +kubectl get machinedeployment +kubectl scale taloscontrolplane management-plane-cp --replicas=3 +``` + +Now that we have the manifest, we can simply apply it: + +```bash +kubectl apply -f management-plane.yaml +``` + +**NOTE: The templated manifest above is meant to act as a starting point.** +**If customizations are needed to ensure proper setup of your Talos cluster, they should be added before applying.** + +Once the management plane is setup, you can fetch the talosconfig by using the cluster label. +Be sure to update the cluster name and issue the following command: + +```bash +kubectl get talosconfig \ + -l cluster.x-k8s.io/cluster-name= \ + -o yaml -o jsonpath='{.items[0].status.talosConfig}' > management-plane-talosconfig.yaml +``` + +With the talosconfig in hand, the management plane's kubeconfig can be fetched with `talosctl --talosconfig management-plane-talosconfig.yaml kubeconfig` + +## Pivoting + +Once we have the kubeconfig for the management cluster, we now have the ability to pivot the cluster from our bootstrap. +Using clusterctl, issue: + +```bash +clusterctl init --kubeconfig=/path/to/management-plane/kubeconfig -i sidero -b talos -c talos +``` + +Followed by: + +```bash +clusterctl move --to-kubeconfig=/path/to/management-plane/kubeconfig +``` + +Upon completion of this command, we can now tear down our bootstrap cluster with `talosctl cluster destroy` and begin using our management plane as our point of creation for all future clusters! diff --git a/website/content/v0.6/Guides/decommissioning.md b/website/content/v0.6/Guides/decommissioning.md new file mode 100644 index 000000000..aeb103914 --- /dev/null +++ b/website/content/v0.6/Guides/decommissioning.md @@ -0,0 +1,26 @@ +--- +description: "A guide for decommissioning servers" +weight: 1 +title: "Decommissioning Servers" +--- + +This guide will detail the process for removing a server from Sidero. +The process is fairly simple with a few pieces of information. + +- For the given server, take note of any serverclasses that are configured to match the server. + +- Take note of any clusters that make use of aforementioned serverclasses. + +- For each matching cluster, edit the cluster resource with `kubectl edit cluster` and set `.spec.paused` to `true`. + Doing this ensures that no new machines will get created for these servers during the decommissioning process. + +- If you want to mark a server to be not allocated after it's accepted into the cluster, set the `.spec.cordoned` field to `true`. + This will prevent the server from being allocated to any new clusters (still allowing it to be wiped). + +- If the server is already part of a cluster (`kubectl get serverbindings -o wide` should provide this info), you can now delete the machine that corresponds with this server via `kubectl delete machine `. + +- With the machine deleted, Sidero will reboot the machine and wipe its disks. + +- Once the disk wiping is complete and the server is turned off, you can finally delete the server from Sidero with `kubectl delete server ` and repurpose the server for something else. + +- Finally, unpause any clusters that were edited in step 3 by setting `.spec.paused` to `false`. diff --git a/website/content/v0.6/Guides/first-cluster.md b/website/content/v0.6/Guides/first-cluster.md new file mode 100644 index 000000000..bb5aeb73d --- /dev/null +++ b/website/content/v0.6/Guides/first-cluster.md @@ -0,0 +1,150 @@ +--- +description: "A guide for creating your first cluster with the Sidero management plane" +weight: 2 +title: "Creating Your First Cluster" +--- + +## Introduction + +This guide will detail the steps needed to provision your first bare metal Talos cluster after completing the bootstrap and pivot steps detailed in the previous guide. +There will be two main steps in this guide: reconfiguring the Sidero components now that they have been pivoted and the actual cluster creation. + +## Reconfigure Sidero + +### Patch Services + +In this guide, we will convert the services to use host networking. +This is also necessary because some protocols like TFTP don't allow for port configuration. +Along with some nodeSelectors and a scale up of the metal controller manager deployment, creating the services this way allows for the creation of DNS names that point to all management plane nodes and provide an HA experience if desired. +It should also be noted, however, that there are many options for achieving this functionality. +Users can look into projects like MetalLB or KubeRouter with BGP and ECMP if they desire something else. + +Metal Controller Manager: + +```bash +## Use host networking +kubectl patch deploy -n sidero-system sidero-controller-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/hostNetwork", "value": true}]' +``` + +#### Update Environment + + + +Sidero by default appends `talos.config` kernel argument with based on the flags `--api-endpoint` and `--api-port` to the `sidero-controller-manager`: +`talos.config=http://$API_ENDPOINT:$API_PORT/configdata?uuid=`. + + + +If this default value doesn't apply, edit the environment with `kubectl edit environment default` and add the `talos.config` kernel arg with the IP of one of the management plane nodes (or the DNS entry you created). + +### Update DHCP + +The DHCP options configured in the previous guide should now be updated to point to your new management plane IP or to the DNS name if it was created. + +A revised ipxe-metal.conf file looks like: + +```bash +allow bootp; +allow booting; + +next-server 192.168.254.2; +if exists user-class and option user-class = "iPXE" { + filename "http://192.168.254.2:8081/boot.ipxe"; +} else { + if substring (option vendor-class-identifier, 15, 5) = "00000" { + # BIOS + if substring (option vendor-class-identifier, 0, 10) = "HTTPClient" { + option vendor-class-identifier "HTTPClient"; + filename "http://192.168.254.2:8081/tftp/undionly.kpxe"; + } else { + filename "undionly.kpxe"; + } + } else { + # UEFI + if substring (option vendor-class-identifier, 0, 10) = "HTTPClient" { + option vendor-class-identifier "HTTPClient"; + filename "http://192.168.254.2:8081/tftp/ipxe.efi"; + } else { + filename "ipxe.efi"; + } + } +} + +host talos-mgmt-0 { + fixed-address 192.168.254.2; + hardware ethernet d0:50:99:d3:33:60; +} +``` + +There are multiple ways to boot the via iPXE: + +- if the node has built-in iPXE, direct URL to the iPXE script can be used: `http://192.168.254.2:8081/boot.ipxe`. +- depending on the boot mode (BIOS or UEFI), either `ipxe.efi` or `undionly.kpxe` can be used (these images contain embedded iPXE scripts). +- iPXE binaries can be delivered either over TFTP or HTTP (HTTP support depends on node firmware). + +## Register the Servers + +At this point, any servers on the same network as Sidero should PXE boot using the Sidero PXE service. +To register a server with Sidero, simply turn it on and Sidero will do the rest. +Once the registration is complete, you should see the servers registered with `kubectl get servers`: + +```bash +$ kubectl get servers -o wide +NAME HOSTNAME ACCEPTED ALLOCATED CLEAN +00000000-0000-0000-0000-d05099d33360 192.168.254.2 false false false +``` + +## Accept the Servers + +Note in the output above that the newly registered servers are not `accepted`. +In order for a server to be eligible for consideration, it _must_ be marked as `accepted`. +Before a `Server` is accepted, no write action will be performed against it. +Servers can be accepted by issuing a patch command like: + +```bash +kubectl patch server 00000000-0000-0000-0000-d05099d33360 --type='json' -p='[{"op": "replace", "path": "/spec/accepted", "value": true}]' +``` + +For more information on server acceptance, see the [server docs](../../resource-configuration/servers). + +## Create the Cluster + +The cluster creation process should be identical to what was detailed in the previous guide. +Using clusterctl, we can create a cluster manifest with: + +```bash +clusterctl generate cluster workload-cluster -i sidero > workload-cluster.yaml +``` + +Note that there are several variables that should be set in order for the templating to work properly: + +- `CONTROL_PLANE_ENDPOINT` and `CONTROL_PLANE_PORT`: The endpoint (IP address or hostname) and the port used for the Kubernetes API server + (e.g. for `https://1.2.3.4:6443`: `CONTROL_PLANE_ENDPOINT=1.2.3.4` and `CONTROL_PLANE_PORT=6443`). + This is the equivalent of the `endpoint` you would specify in `talosctl gen config`. + There are a variety of ways to configure a control plane endpoint. + Some common ways for an HA setup are to use DNS, a load balancer, or BGP. + A simpler method is to use the IP of a single node. + This has the disadvantage of being a single point of failure, but it can be a simple way to get running. +- `CONTROL_PLANE_SERVERCLASS`: The server class to use for control plane nodes. +- `WORKER_SERVERCLASS`: The server class to use for worker nodes. +- `KUBERNETES_VERSION`: The version of Kubernetes to deploy (e.g. `v1.19.4`). +- `TALOS_VERSION`: This should correspond to the minor version of Talos that you will be deploying (e.g. `v0.10`). + This value is used in determining the fields present in the machine configuration that gets generated for Talos nodes. + Note that the default is currently `v0.13`. + +Now that we have the manifest, we can simply apply it: + +```bash +kubectl apply -f workload-cluster.yaml +``` + +**NOTE: The templated manifest above is meant to act as a starting point.** +**If customizations are needed to ensure proper setup of your Talos cluster, they should be added before applying.** + +Once the workload cluster is setup, you can fetch the talosconfig with a command like: + +```bash +kubectl get talosconfig -o yaml workload-cluster-cp-xxx -o jsonpath='{.status.talosConfig}' > workload-cluster-talosconfig.yaml +``` + +Then the workload cluster's kubeconfig can be fetched with `talosctl --talosconfig workload-cluster-talosconfig.yaml kubeconfig /desired/path`. diff --git a/website/content/v0.6/Guides/flow.md b/website/content/v0.6/Guides/flow.md new file mode 100644 index 000000000..0ed97273c --- /dev/null +++ b/website/content/v0.6/Guides/flow.md @@ -0,0 +1,81 @@ +--- +description: "Diagrams for various flows in Sidero." +weight: 4 +title: "Provisioning Flow" +--- + +```mermaid +graph TD; + Start(Start); + End(End); + + %% Decisions + + IsOn{Is server is powered on?}; + IsRegistered{Is server is registered?}; + IsAccepted{Is server is accepted?}; + IsClean{Is server is clean?}; + IsAllocated{Is server is allocated?}; + + %% Actions + + DoPowerOn[Power server on]; + DoPowerOff[Power server off]; + DoBootAgentEnvironment[Boot agent]; + DoBootEnvironment[Boot environment]; + DoRegister[Register server]; + DoWipe[Wipe server]; + + %% Chart + + Start-->IsOn; + IsOn--Yes-->End; + IsOn--No-->DoPowerOn; + + DoPowerOn--->IsRegistered; + + IsRegistered--Yes--->IsAccepted; + IsRegistered--No--->DoBootAgentEnvironment-->DoRegister; + + DoRegister-->IsRegistered; + + IsAccepted--Yes--->IsAllocated; + IsAccepted--No--->End; + + IsAllocated--Yes--->DoBootEnvironment; + IsAllocated--No--->IsClean; + IsClean--No--->DoWipe-->DoPowerOff; + + IsClean--Yes--->DoPowerOff; + + DoBootEnvironment-->End; + + DoPowerOff-->End; +``` + +## Installation Flow + +```mermaid +graph TD; + Start(Start); + End(End); + + %% Decisions + + IsInstalled{Is installed}; + + %% Actions + + DoInstall[Install]; + DoReboot[Reboot]; + + %% Chart + + Start-->IsInstalled; + IsInstalled--Yes-->End; + IsInstalled--No-->DoInstall; + + DoInstall-->DoReboot; + + DoReboot-->IsInstalled; +``` diff --git a/website/content/v0.6/Guides/iso.md b/website/content/v0.6/Guides/iso.md new file mode 100644 index 000000000..1d666a70e --- /dev/null +++ b/website/content/v0.6/Guides/iso.md @@ -0,0 +1,23 @@ +--- +description: "A guide for bootstrapping Sidero management plane using the ISO image" +weight: 1 +title: "Building A Management Plane with ISO Image" +--- + +This guide will provide some very basic detail about how you can also build a Sidero management plane using the Talos ISO image instead of following the Docker-based process that we detail in our Getting Started tutorials. + +Using the ISO is a perfectly valid way to build a Talos cluster, but this approach is not recommended for Sidero as it avoids the "pivot" step detailed [here](../../getting-started/pivot). +Skipping this step means that the management plane does not become "self-hosted", in that it cannot be upgraded and scaled using the Sidero processes we follow for workload clusters. +For folks who are willing to take care of their management plane in other ways, however, this approach will work fine. + +The rough outline of this process is very short and sweet, as it relies on other documentation: + +- For each management plane node, boot the ISO and install Talos using the "apply-config" process mentioned in our Talos [Getting Started](https://www.talos.dev/docs/v0.13/introduction/getting-started/) docs. + These docs go into heavy detail on using the ISO, so they will not be recreated here. + +- With a Kubernetes cluster now in hand (and with access to it via `talosctl` and `kubectl`), you can simply pickup the Getting Started tutorial at the "Install Sidero" section [here](../../getting-started/install-clusterapi). + Keep in mind, however, that you will be unable to do the "pivoting" section of the tutorial, so just skip that step when you reach the end of the tutorial. + +> Note: It may also be of interest to view the prerequisite guides on [CLI](../../getting-started/prereq-cli-tools) and [DHCP](../../getting-started/prereq-dhcp) setup, as they will still apply to this method. + +- For long-term maintenance of a management plane created in this way, refer to the Talos documentation for upgrading [Kubernetes](https://www.talos.dev/docs/v0.13/guides/upgrading-kubernetes/) and [Talos](https://www.talos.dev/docs/v0.13/guides/upgrading-talos/) itself. diff --git a/website/content/v0.6/Guides/patching.md b/website/content/v0.6/Guides/patching.md new file mode 100644 index 000000000..70609fed3 --- /dev/null +++ b/website/content/v0.6/Guides/patching.md @@ -0,0 +1,57 @@ +--- +description: "A guide describing patching" +weight: 3 +title: "Patching" +--- + +Server resources can be updated by using the `configPatches` section of the custom resource. +Any field of the [Talos machine config](https://www.talos.dev/docs/v0.13/reference/configuration/) +can be overridden on a per-machine basis using this method. +The format of these patches is based on [JSON 6902](http://jsonpatch.com/) that you may be used to in tools like kustomize. + +Any patches specified in the server resource are processed by the Sidero controller before it returns a Talos machine config for a given server at boot time. + +A set of patches may look like this: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +metadata: + name: 00000000-0000-0000-0000-d05099d33360 +spec: + configPatches: + - op: replace + path: /machine/install + value: + disk: /dev/sda + - op: replace + path: /cluster/network/cni + value: + name: "custom" + urls: + - "http://192.168.1.199/assets/cilium.yaml" +``` + +## Testing Configuration Patches + +While developing config patches it is usually convenient to test generated config with patches +before actual server is provisioned with the config. + +This can be achieved by querying the metadata server endpoint directly: + +```sh +$ curl http://$PUBLIC_IP:8081/configdata?uuid=$SERVER_UUID +version: v1alpha1 +... +``` + +Replace `$PUBLIC_IP` with the Sidero IP address and `$SERVER_UUID` with the name of the `Server` to test +against. + +If metadata endpoint returns an error on applying JSON patches, make sure config subtree being patched exists in the config. +If it doesn't exist, create it with the `op: add` above the `op: replace` patch. + +## Combining Patches from Multiple Sources + +Config patches might be combined from multiple sources (`Server`, `ServerClass`, `TalosControlPlane`, `TalosConfigTemplate`), which is explained in details +in [Metadata](../../resource-configuration/metadata/) section. diff --git a/website/content/v0.6/Guides/rpi4-as-servers.md b/website/content/v0.6/Guides/rpi4-as-servers.md new file mode 100644 index 000000000..48113624c --- /dev/null +++ b/website/content/v0.6/Guides/rpi4-as-servers.md @@ -0,0 +1,268 @@ +--- +description: "Using Raspberrypi Pi 4 as servers" +weight: 6 +title: "Raspberry Pi4 as Servers" +--- + +This guide will explain on how to use Sidero to manage Raspberrypi-4's as +servers. +This guide goes hand in hand with the [bootstrapping +guide](../../guides/bootstrapping). + +From the bootstrapping guide, reach "Install Sidero" and come back to this +guide. +Once you finish with this guide, you will need to go back to the +bootstrapping guide and continue with "Register the servers". + +The rest of this guide goes with the assumption that you've a cluster setup with +Sidero and ready to accept servers. +This guide will explain the changes that needs to be made to be able to accept RPI4 as server. + +## RPI4 boot process + +To be able to boot talos on the Pi4 via network, we need to undergo a 2-step boot process. +The Pi4 has an EEPROM which contains code to boot up the Pi. +This EEPROM expects a specific boot folder structure as explained on +[this](https://www.raspberrypi.org/documentation/configuration/boot_folder.md) page. +We will use the EEPROM to boot into UEFI, which we will then use to PXE and iPXE boot into sidero & talos. + +## Prerequisites + +### Update EEPROM + +_NOTE:_ If you've updated the EEPROM with the image that was referenced on [the talos docs](https://www.talos.dev/docs/v0.13/single-board-computers/rpi_4/#updating-the-eeprom), +you can either flash it with the one mentioned below, or visit [the EEPROM config docs](https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2711_bootloader_config.md) +and change the boot order of EEPROM to `0xf21`. +Which means try booting from SD first, then try network. + +To enable the EEPROM on the Pi to support network booting, we must update it to +the latest version. +Visit the [release](https://github.com/raspberrypi/rpi-eeprom/releases) page and grab the +latest `rpi-boot-eeprom-recovery-*-network.zip` (as of time of writing, +v2021.0v.29-138a1 was used). +Put this on a SD card and plug it into the Pi. +The +Pi's status light will flash rapidly after a few seconds, this indicates that +the EEPROM has been updated. + +This operation needs to be done once per Pi. + +### Serial number + +Power on the Pi without an SD card in it and hook it up to a monitor, you will +be greeted with the boot screen. +On this screen you will find some information +about the Pi. +For this guide, we are only interested in the serial number. +The +first line under the Pi logo will be something like the following: + +`board: xxxxxx ` + +Write down the 8 character serial. + +### talos-systems/pkg + +Clone the [talos-systems/pkg](https://github.com/talos-systems/pkgs) repo. +Create a new folder called `raspberrypi4-uefi` and `raspberrypi4-uefi/serials`. +Create a file `raspberrypi4-uefi/pkg.yaml` containing the following: + +```yaml +name: raspberrypi4-uefi +variant: alpine +install: + - unzip +steps: +# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr restricting build to arm64 only + - sources: + - url: https://github.com/pftf/RPi4/releases/download/v1.26/RPi4_UEFI_Firmware_v1.26.zip # <-- update version NR accordingly. + destination: RPi4_UEFI_Firmware.zip + sha256: d6db87484dd98dfbeb64eef203944623130cec8cb71e553eab21f8917e0285f7 + sha512: 96a71086cdd062b51ef94726ebcbf15482b70c56262555a915499bafc04aff959d122410af37214760eda8534b58232a64f6a8a0a8bb99aba6de0f94c739fe98 + prepare: + - | + unzip RPi4_UEFI_Firmware.zip + rm RPi4_UEFI_Firmware.zip + mkdir /rpi4 + mv ./* /rpi4 + install: + - | + mkdir /tftp + ls /pkg/serials | while read serial; do mkdir /tftp/$serial && cp -r /rpi4/* /tftp/$serial && cp -r /pkg/serials/$serial/* /tftp/$serial/; done +# {{ else }} + - install: + - | + mkdir -p /tftp +# {{ end }} +finalize: + - from: / + to: / +``` + +## UEFI / RPi4 + +Now that the EEPROM can network boot, we need to prepare the structure of our +boot folder. +Essentially what the bootloader will do is look for this folder +on the network rather than on the SD card. + +Visit the [release page of RPi4](https://github.com/pftf/RPi4/releases) and grab +the latest `RPi4_UEFI_Firmware_v*.zip` (at the time of writing, v1.26 was used). +Extract the zip into a folder, the structure will look like the following: + +```bash +. +├── RPI_EFI.fd +├── RPi4_UEFI_Firmware_v1.26.zip +├── Readme.md +├── bcm2711-rpi-4-b.dtb +├── bcm2711-rpi-400.dtb +├── bcm2711-rpi-cm4.dtb +├── config.txt +├── firmware +│   ├── LICENCE.txt +│   ├── Readme.txt +│   ├── brcmfmac43455-sdio.bin +│   ├── brcmfmac43455-sdio.clm_blob +│   └── brcmfmac43455-sdio.txt +├── fixup4.dat +├── overlays +│   └── miniuart-bt.dtbo +└── start4.elf +``` + +As a one time operation, we need to configure UEFI to do network booting by +default, remove the 3gb mem limit if it's set and optionally set the CPU clock to +max. +Take these files and put them on the SD card and boot the Pi. +You will see the Pi logo, and the option to hit `esc`. + +### Remove 3GB mem limit + +1. From the home page, visit "Device Manager". +2. Go down to "Raspberry Pi Configuration" and open that menu. +3. Go to "Advanced Configuration". +4. Make sure the option "Limit RAM to 3 GB" is set to `Disabled`. + +### Change CPU to Max (optionally) + +1. From the home page, visit "Device Manager". +2. Go down to "Raspberry Pi Configuration" and open that menu. +3. Go to "CPU Configuration". +4. Change CPU clock to `Max`. + +## Change boot order + +1. From the home page, visit "Boot Maintenance Manager". +2. Go to "Boot Options". +3. Go to "Change Boot Order". +4. Make sure that `UEFI PXEv4` is the first boot option. + +### Persisting changes + +Now that we have made the changes above, we need to persist these changes. +Go back to the home screen and hit `reset` to save the changes to disk. + +When you hit `reset`, the settings will be saved to the `RPI_EFI.fd` file on the +SD card. +This is where we will run into a limitation that is explained in the +following issue: [pftf/RPi4#59](https://github.com/pftf/RPi4/issues/59). +What this mean is that we need to create a `RPI_EFI.fd` file for each Pi that we want to use as server. +This is because the MAC address is also stored in the `RPI_EFI.fd` file, +which makes it invalid when you try to use it in a different Pi. + +Plug the SD card back into your computer and extract the `RPI_EFI.fd` file from +it and place it into the `raspberrypi4-uefi/serials//`. +The dir should look like this: + +```bash +raspberrypi4-uefi/ +├── pkg.yaml +└── serials + └─── XXXXXXXX + └── RPI_EFI.fd +``` + +## Build the image with the boot folder contents + +Now that we have the `RPI_EFI.fd` of our Pi in the correct location, we must now +build a docker image containing the boot folder for the EEPROM. +To do this, run the following command in the pkgs repo: + +`make PLATFORM=linux/arm64 USERNAME=$USERNAME PUSH=true TARGETS=raspberrypi4-uefi` + +This will build and push the following image: +`ghcr.io/$USERNAME/raspberrypi4-uefi:` + +_If you need to change some other settings like registry etc, have a look in the +Makefile to see the available variables that you can override._ + +The content of the `/tftp` folder in the image will be the following: + +```bash +XXXXXXXX +├── RPI_EFI.fd +├── Readme.md +├── bcm2711-rpi-4-b.dtb +├── bcm2711-rpi-400.dtb +├── bcm2711-rpi-cm4.dtb +├── config.txt +├── firmware +│   ├── LICENCE.txt +│   ├── Readme.txt +│   ├── brcmfmac43455-sdio.bin +│   ├── brcmfmac43455-sdio.clm_blob +│   └── brcmfmac43455-sdio.txt +├── fixup4.dat +├── overlays +│   └── miniuart-bt.dtbo +└── start4.elf +``` + +## Patch metal controller + +To enable the 2 boot process, we need to include this EEPROM boot folder into +the sidero's tftp folder. +To achieve this, we will use an init container using +the image we created above to copy the contents of it into the tftp folder. + +Create a file `patch.yaml` with the following contents: + +```yaml +spec: + template: + spec: + volumes: + - name: tftp-folder + emptyDir: {} + initContainers: + - image: ghcr.io//raspberrypi4-uefi:v # <-- change accordingly. + imagePullPolicy: Always + name: tftp-folder-setup + command: + - cp + args: + - -r + - /tftp + - /var/lib/sidero/ + volumeMounts: + - mountPath: /var/lib/sidero/tftp + name: tftp-folder + containers: + - name: manager + volumeMounts: + - mountPath: /var/lib/sidero/tftp + name: tftp-folder +``` + +Followed by this command to apply the patch: + +```bash +kubectl -n sidero-system patch deployments.apps sidero-controller-manager --patch "$(cat patch.yaml)" +``` + +## Profit + +With the patched metal controller, you should now be able to register the Pi4 to +sidero by just connecting it to the network. +From this point you can continue with the [bootstrapping guide](../../guides/bootstrapping#register-the-servers). diff --git a/website/content/v0.6/Guides/sidero-on-rpi4.md b/website/content/v0.6/Guides/sidero-on-rpi4.md new file mode 100644 index 000000000..96c095efe --- /dev/null +++ b/website/content/v0.6/Guides/sidero-on-rpi4.md @@ -0,0 +1,158 @@ +--- +description: "Running Sidero on Raspberry Pi 4 to provision bare-metal servers." +title: Sidero on Raspberry Pi 4 +weight: 7 +--- + +Sidero doesn't require a lot of computing resources, so SBCs are a perfect fit to run +the Sidero management cluster. +In this guide, we are going to install Talos on Raspberry Pi4, deploy Sidero and other CAPI components. + +## Prerequisites + +Please see Talos documentation for additional information on [installing Talos on Raspberry Pi4](https://www.talos.dev/docs/v0.13/single-board-computers/rpi_4/). + +Download the `clusterctl` CLI from [CAPI releases](https://github.com/kubernetes-sigs/cluster-api/releases). +The minimum required version is 0.4.3. + +## Installing Talos + +Prepare the SD card with the Talos RPi4 image, and boot the RPi4. +Talos should drop into maintenance mode printing the acquired IP address. +Record the IP address as the environment variable `SIDERO_ENDPOINT`: + +```bash +export SIDERO_ENDPOINT=192.168.x.x +``` + +> Note: it makes sense to transform DHCP lease for RPi4 into a static reservation so that RPi4 always has the same IP address. + +Generate Talos machine configuration for a single-node cluster: + +```bash +talosctl gen config --config-patch='[{"op": "add", "path": "/cluster/allowSchedulingOnMasters", "value": true},{"op": "replace", "path": "/machine/install/disk", "value": "/dev/mmcblk0"}]' rpi4-sidero https://${SIDERO_ENDPOINT}:6443/ +``` + +Submit the generated configuration to Talos: + +```bash +talosctl apply-config --insecure -n ${SIDERO_ENDPOINT} -f controlplane.yaml +``` + +Merge client configuration `talosconfig` into default `~/.talos/config` location: + +```bash +talosctl config merge talosconfig +``` + +Update default endpoint and nodes: + +```bash +talosctl config endpoints ${SIDERO_ENDPOINT} +talosctl config nodes ${SIDERO_ENDPOINT} +``` + +You can verify that Talos has booted by running: + +```bash +$ talosctl version +talosctl version +Client: + Tag: v0.10.3 + SHA: 21018f28 + Built: + Go version: go1.16.3 + OS/Arch: linux/amd64 + +Server: + NODE: 192.168.0.31 + Tag: v0.10.3 + SHA: 8f90c6a8 + Built: + Go version: go1.16.3 + OS/Arch: linux/arm64 +``` + +Bootstrap the etcd cluster: + +```bash +talosctl bootstrap +``` + +At this point, Kubernetes is bootstrapping, and it should be available once all the images are fetched. + +Fetch the `kubeconfig` from the cluster with: + +```bash +talosctl kubeconfig +``` + +You can watch the bootstrap progress by running: + +```bash +talosctl dmesg -f +``` + +Once Talos prints `[talos] boot sequence: done`, Kubernetes should be up: + +```bash +kubectl get nodes +``` + +## Installing Sidero + +Install Sidero with host network mode, exposing the endpoints on the node's address: + +```bash +SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY=Recreate SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=${SIDERO_IP} clusterctl init -i sidero -b talos -c talos +``` + +Watch the progress of installation with: + +```bash +watch -n 2 kubectl get pods -A +``` + +Once images are downloaded, all pods should be in running state: + +```bash +$ kubectl get pods -A +NAMESPACE NAME READY STATUS RESTARTS AGE +cabpt-system cabpt-controller-manager-6458494888-d7lnm 1/1 Running 0 29m +cacppt-system cacppt-controller-manager-f98854db8-qgkf9 1/1 Running 0 29m +capi-system capi-controller-manager-58f797cb65-8dwpz 2/2 Running 0 30m +capi-webhook-system cabpt-controller-manager-85fd964c9c-ldzb6 1/1 Running 0 29m +capi-webhook-system cacppt-controller-manager-75c479b7f-5hw89 1/1 Running 0 29m +capi-webhook-system capi-controller-manager-7d596cc4cb-kjrfk 2/2 Running 0 30m +capi-webhook-system caps-controller-manager-79664cf677-zqbvw 1/1 Running 0 29m +cert-manager cert-manager-86cb5dcfdd-v86wr 1/1 Running 0 31m +cert-manager cert-manager-cainjector-84cf775b89-swk25 1/1 Running 0 31m +cert-manager cert-manager-webhook-7f9f4f8dcb-29xm4 1/1 Running 0 31m +kube-system coredns-fcc4c97fb-wkxkg 1/1 Running 0 35m +kube-system coredns-fcc4c97fb-xzqzj 1/1 Running 0 35m +kube-system kube-apiserver-talos-192-168-0-31 1/1 Running 0 33m +kube-system kube-controller-manager-talos-192-168-0-31 1/1 Running 0 33m +kube-system kube-flannel-qmlw6 1/1 Running 0 34m +kube-system kube-proxy-j24hg 1/1 Running 0 34m +kube-system kube-scheduler-talos-192-168-0-31 1/1 Running 0 33m +``` + +Verify Sidero installation and network setup with: + +```bash +$ curl -I http://${SIDERO_ENDPOINT}:8081/tftp/ipxe.efi +HTTP/1.1 200 OK +Accept-Ranges: bytes +Content-Length: 1020416 +Content-Type: application/octet-stream +Last-Modified: Thu, 03 Jun 2021 15:40:58 GMT +Date: Thu, 03 Jun 2021 15:41:51 GMT +``` + +Now Sidero is installed, and it is ready to be used. +Configure your DHCP server to PXE boot your bare metal servers from `$SIDERO_ENDPOINT` (see [Bootstrapping guide](../bootstrapping/) on DHCP configuration). + +## Backup and Recovery + +SD cards are not very reliable, so make sure you are taking regular [etcd backups](https://www.talos.dev/docs/v0.13/guides/disaster-recovery/#backup), +so that you can [recover](https://www.talos.dev/docs/v0.13/guides/disaster-recovery/#recovery) your Sidero installation in case of data loss. diff --git a/website/content/v0.6/Guides/upgrades.md b/website/content/v0.6/Guides/upgrades.md new file mode 100644 index 000000000..645d3fbbe --- /dev/null +++ b/website/content/v0.6/Guides/upgrades.md @@ -0,0 +1,66 @@ +--- +description: "A guide describing upgrades" +title: "Upgrading" +weight: 5 +--- + +Upgrading a running workload cluster or management plane is the same process as describe in the Talos documentation. + +To upgrade the Talos OS, see [here](https://www.talos.dev/docs/v0.13/guides/upgrading-talos). + +In order to upgrade Kubernetes itself, see [here](https://www.talos.dev/docs/v0.13/guides/upgrading-kubernetes/). + +## Upgrading Talos 0.8 -> 0.9 + +It is important, however, to take special consideration for upgrades of the Talos v0.8.x series to v0.9.x. +Because of the move from self-hosted control plane to static pods, some certificate information has changed that needs to be manually updated. +The steps are as follows: + +- Upgrade a single control plane node to the v0.9.x series using the upgrade instructions above. +upgrade + +- After upgrade, carry out a `talosctl convert-k8s` to move from the self-hosted control plane to static pods. + +- Targeting the upgraded node, issue `talosctl read -n /system/state/config.yaml` and copy out the `cluster.aggregatorCA` and `cluster.serviceAccount` sections. + +- In the management cluster, issue `kubectl edit secret -talos`. + +- While in editing view, copy the `data.certs` field and decode it with `echo '' | base64 -d` + +> Note: It may also be a good idea to copy the secret in its entirety as a backup. +> This can be done with a simple `kubectl get secret -talos -o yaml`. + +- Copying the output above to a text editor, update the aggregator and service account sections with the certs and keys copied previously and save it. +The resulting file should look like: + +```yaml +admin: + crt: xxx + key: xxx +etcd: + crt: xxx + key: xxx +k8s: + crt: xxx + key: xxx +k8saggregator: + crt: xxx + key: xxx +k8sserviceaccount: + key: xxx +os: + crt: xxx + key: xxx +``` + +- Re-encode the data with `cat | base64 | tr -d '\n'` + +- With the secret still open for editing, update the `data.certs` field to contain the new base64 data. + +- Edit the cluster's TalosControlPlane resource with `kubectl edit tcp `. +Update the `spec.controlPlaneConfig.[controlplane,init].talosVersion` fields to be `v0.9`. + +- Edit any TalosConfigTemplate resources and update `spec.template.spec.talosVersion` to be the same value. + +- At this point, any new controlplane or worker machines should receive the newer machine config format and join the cluster successfully. +You can also proceed to upgrade existing nodes. diff --git a/website/content/v0.6/Overview/_index.md b/website/content/v0.6/Overview/_index.md new file mode 100644 index 000000000..6501696f4 --- /dev/null +++ b/website/content/v0.6/Overview/_index.md @@ -0,0 +1,4 @@ +--- +title: "Overview" +weight: 10 +--- \ No newline at end of file diff --git a/website/content/v0.6/Overview/architecture.md b/website/content/v0.6/Overview/architecture.md new file mode 100644 index 000000000..415febffd --- /dev/null +++ b/website/content/v0.6/Overview/architecture.md @@ -0,0 +1,11 @@ +--- +description: "" +weight: 30 +title: "Architecture" +--- + +The overarching architecture of Sidero centers around a "management plane". +This plane is expected to serve as a single interface upon which administrators can create, scale, upgrade, and delete Kubernetes clusters. +At a high level view, the management plane + created clusters should look something like: + +![Alternative text](/images/dc-view.png) diff --git a/website/content/v0.6/Overview/installation.md b/website/content/v0.6/Overview/installation.md new file mode 100644 index 000000000..82d5e3f3c --- /dev/null +++ b/website/content/v0.6/Overview/installation.md @@ -0,0 +1,42 @@ +--- +description: "" +weight: 20 +title: Installation +--- + +To install Sidero and the other Talos providers, simply issue: + +```bash +clusterctl init -b talos -c talos -i sidero +``` + +Sidero supports several variables to configure the installation, these variables can be set either as environment +variables or as variables in the `clusterctl` configuration: + +- `SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` (`false`): run `sidero-controller-manager` on host network +- `SIDERO_CONTROLLER_MANAGER_DEPLOYMENT_STRATEGY` (`RollingUpdate`): strategy to use when updating `sidero-controller-manager`, use `Recreate` when using a single node and `SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` is `true` +- `SIDERO_CONTROLLER_MANAGER_API_ENDPOINT` (empty): specifies the IP address controller manager API service can be reached on, defaults to the node IP (TCP) +- `SIDERO_CONTROLLER_MANAGER_API_PORT` (8081): specifies the port controller manager can be reached on +- `SIDERO_CONTROLLER_MANAGER_CONTAINER_API_PORT` (8081): specifies the controller manager internal container port +- `SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT` (empty): specifies the IP address SideroLink Wireguard service can be reached on, defaults to the node IP (UDP) +- `SIDERO_CONTROLLER_MANAGER_SIDEROLINK_PORT` (51821): specifies the port SideroLink Wireguard service can be reached on +- `SIDERO_CONTROLLER_MANAGER_EXTRA_AGENT_KERNEL_ARGS` (empty): specifies additional Linux kernel arguments for the Sidero agent (for example, different console settings) +- `SIDERO_CONTROLLER_MANAGER_AUTO_ACCEPT_SERVERS` (`false`): automatically accept discovered servers, by default `.spec.accepted` should be changed to `true` to accept the server +- `SIDERO_CONTROLLER_MANAGER_AUTO_BMC_SETUP` (`true`): automatically attempt to configure the BMC with a `sidero` user that will be used for all IPMI tasks. +- `SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE` (`true`): wipe only the first megabyte of each disk on the server, otherwise wipe the full disk +- `SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT` (`20m`): timeout for the server reboot (how long it might take for the server to be rebooted before Sidero retries an IPMI reboot operation) +- `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` (`uefi`): IPMI boot from PXE method: `uefi` for UEFI boot or `bios` for BIOS boot +- `SIDERO_CONTROLLER_MANAGER_BOOT_FROM_DISK_METHOD` (`ipxe-exit`): configures the way Sidero forces server to boot from disk when server hits iPXE server after initial install: `ipxe-exit` returns iPXE script with `exit` command, `http-404` returns HTTP 404 Not Found error, `ipxe-sanboot` uses iPXE `sanboot` command to boot from the first hard disk (can be also configured on `ServerClass`/`Server` method) + +Sidero provides three endpoints which should be made available to the infrastructure: + +- TCP port 8081 which provides combined iPXE, metadata and gRPC service (external endpoint should be specified as `SIDERO_CONTROLLER_MANAGER_API_ENDPOINT` and `SIDERO_CONTROLLER_MANAGER_API_PORT`) +- UDP port 69 for the TFTP service (DHCP server should point the nodes to PXE boot from that IP) +- UDP port 51821 for the SideroLink Wireguard service (external endpoint should be specified as `SIDERO_CONTROLLER_MANAGER_SIDEROLINK_ENDPOINT` and `SIDERO_CONTROLLER_MANAGER_SIDEROLINK_PORT`) + +These endpoints could be exposed to the infrastructure using different strategies: + +- running `sidero-controller-manager` on the host network. +- using Kubernetes load balancers (e.g. MetalLB), ingress controllers, etc. + +> Note: If you want to run `sidero-controller-manager` on the host network using port different from `8081` you should set both `SIDERO_CONTROLLER_MANAGER_API_PORT` and `SIDERO_CONTROLLER_MANAGER_CONTAINER_API_PORT` to the same value. diff --git a/website/content/v0.6/Overview/introduction.md b/website/content/v0.6/Overview/introduction.md new file mode 100755 index 000000000..a29917c22 --- /dev/null +++ b/website/content/v0.6/Overview/introduction.md @@ -0,0 +1,29 @@ +--- +description: "" +weight: 10 +title: Introduction +--- + +Sidero ("Iron" in Greek) is a project created by the [Sidero Labs](https://www.SideroLabs.com/) team. +Sidero Metal provides lightweight, composable tools that can be used to create bare-metal [Talos Linux](https://www.talos.dev) + Kubernetes clusters. +These tools are built around the Cluster API project. + +Because of the design of Cluster API, there is inherently a "chicken and egg" problem: you need an existing Kubernetes cluster in order to provision the management plane, that can then provision more clusters. +The initial management plane cluster that runs the Sidero Metal provider does not need to be based on Talos Linux - although it is recommended for security and stability reasons. +The [Getting Started](../../getting-started/) guide will walk you through installing Sidero Metal either on an existing cluster, or by quickly creating a docker based cluster used to bootstrap the process. + +## Overview + +Sidero Metal is currently made up of two components: + +- Metal Controller Manager: Provides custom resources and controllers for managing the lifecycle of metal machines, iPXE server, metadata service, and gRPC API service +- Cluster API Provider Sidero (CAPS): A Cluster API infrastructure provider that makes use of the pieces above to spin up Kubernetes clusters + +Sidero Metal also needs these co-requisites in order to be useful: + +- [Cluster API](https://github.com/kubernetes-sigs/cluster-api) +- [Cluster API Control Plane Provider Talos](https://github.com/talos-systems/cluster-api-control-plane-provider-talos) +- [Cluster API Bootstrap Provider Talos](https://github.com/talos-systems/cluster-api-bootstrap-provider-talos) + +All components mentioned above can be installed using Cluster API's `clusterctl` tool. +See the [Getting Started](../../getting-started/) for more details. diff --git a/website/content/v0.6/Overview/minimum-requirements.md b/website/content/v0.6/Overview/minimum-requirements.md new file mode 100644 index 000000000..6b581ea5c --- /dev/null +++ b/website/content/v0.6/Overview/minimum-requirements.md @@ -0,0 +1,20 @@ +--- +title: System Requirements +--- + +## System Requirements + +Most of the time, Sidero does very little, so it needs very few resources. +However, since it is in charge of any number of workload clusters, it **should** +be built with redundancy. +It is also common, if the cluster is single-purpose, +to combine the controlplane and worker node roles. +Virtual machines are also +perfectly well-suited for this role. + +Minimum suggested dimensions: + +- Node count: 3 +- Node RAM: 4GB +- Node CPU: ARM64 or x86-64 class +- Node storage: 32GB storage on system disk diff --git a/website/content/v0.6/Overview/resources.md b/website/content/v0.6/Overview/resources.md new file mode 100644 index 000000000..91053b21e --- /dev/null +++ b/website/content/v0.6/Overview/resources.md @@ -0,0 +1,154 @@ +--- +description: "" +weight: 50 +title: Resources +--- + +Sidero, the Talos bootstrap/controlplane providers, and Cluster API each provide several custom resources (CRDs) to Kubernetes. +These CRDs are crucial to understanding the connections between each provider and in troubleshooting problems. +It may also help to look at the [cluster template](https://github.com/talos-systems/sidero/blob/master/templates/cluster-template.yaml) to get an idea of the relationships between these. + +--- + +## Cluster API (CAPI) + +It's worth defining the most basic resources that CAPI provides first, as they are related to several subsequent resources below. + +### `Cluster` + +`Cluster` is the highest level CAPI resource. +It allows users to specify things like network layout of the cluster, as well as contains references to the infrastructure and control plane resources that will be used to create the cluster. + +### `Machines` + +`Machine` represents an infrastructure component hosting a Kubernetes node. +Allows for specification of things like Kubernetes version, as well as contains reference to the infrastructure resource that relates to this machine. + +### `MachineDeployments` + +`MachineDeployments` are similar to a `Deployment` and their relationship to `Pods` in Kubernetes primitives. +A `MachineDeployment` allows for specification of a number of Machine replicas with a given specification. + +--- + +## Cluster API Bootstrap Provider Talos (CABPT) + +### `TalosConfigs` + +The `TalosConfig` resource allows a user to specify the type (init, controlplane, join) for a given machine. +The bootstrap provider will then generate a Talos machine configuration for that machine. +This resource also provides the ability to pass a full, pre-generated machine configuration. +Finally, users have the ability to pass `configPatches`, which are applied to edit a generate machine configuration with user-defined settings. +The `TalosConfig` corresponds to the `bootstrap` sections of Machines, `MachineDeployments`, and the `controlPlaneConfig` section of `TalosControlPlanes`. + +### `TalosConfigTemplates` + +`TalosConfigTemplates` are similar to the `TalosConfig` above, but used when specifying a bootstrap reference in a `MachineDeployment`. + +--- + +## Cluster API Control Plane Provider Talos (CACPPT) + +### `TalosControlPlanes` + +The control plane provider presents a single CRD, the `TalosControlPlane`. +This resource is similar to `MachineDeployments`, but is targeted exclusively for the Kubernetes control plane nodes. +The `TalosControlPlane` allows for specification of the number of replicas, version of Kubernetes for the control plane nodes, references to the infrastructure resource to use (`infrastructureTemplate` section), as well as the configuration of the bootstrap data via the `controlPlaneConfig` section. +This resource is referred to by the CAPI Cluster resource via the `controlPlaneRef` section. + +--- + +## Sidero + +### Cluster API Provider Sidero (CAPS) + +#### `MetalClusters` + +A `MetalCluster` is Sidero's view of the cluster resource. +This resource allows users to define the control plane endpoint that corresponds to the Kubernetes API server. +This resource corresponds to the `infrastructureRef` section of Cluster API's `Cluster` resource. + +#### `MetalMachines` + +A `MetalMachine` is Sidero's view of a machine. +Allows for reference of a single server or a server class from which a physical server will be picked to bootstrap. + +`MetalMachine` provides a set of statuses describing the state (available with SideroLink, requires Talos >= 0.14): + +```yaml +status: + addresses: + - address: 172.25.0.5 + type: InternalIP + - address: pxe-2 + type: Hostname + conditions: + - lastTransitionTime: "2022-02-11T14:20:42Z" + message: 'Get ... connection refused' + reason: ProviderUpdateFailed + severity: Warning + status: "False" + type: ProviderSet + - lastTransitionTime: "2022-02-11T12:48:35Z" + status: "True" + type: TalosConfigLoaded + - lastTransitionTime: "2022-02-11T12:48:35Z" + status: "True" + type: TalosConfigValidated + - lastTransitionTime: "2022-02-11T12:48:35Z" + status: "True" + type: TalosInstalled +``` + +Statuses: + +- `addresses` lists the current IP addresses and hostname of the node, `addresses` are updated when the node addresses are changed +- `conditions`: + - `ProviderSet`: captures the moment infrastrucutre provider ID is set in the `Node` specification; depends on workload cluster control plane availability + - `TalosConfigLoaded`: Talos successfully loaded machine configuration from Sidero; if this condition indicates a failure, check `sidero-controller-manager` logs + - `TalosConfigValidated`: Talos successfully validated machine configuration; a failure in this condition indicates that the machine config is malformed + - `TalosInstalled`: Talos was successfully installed to disk + +#### `MetalMachineTemplates` + +A `MetalMachineTemplate` is similar to a `MetalMachine` above, but serves as a template that is reused for resources like `MachineDeployments` or `TalosControlPlanes` that allocate multiple `Machines` at once. + +#### `ServerBindings` + +`ServerBindings` represent a one-to-one mapping between a Server resource and a `MetalMachine` resource. +A `ServerBinding` is used internally to keep track of servers that are allocated to a Kubernetes cluster and used to make decisions on cleaning and returning servers to a `ServerClass` upon deallocation. + +### Metal Controller Manager + +#### `Environments` + +These define a desired deployment environment for Talos, including things like which kernel to use, kernel args to pass, and the initrd to use. +Sidero allows you to define a default environment, as well as other environments that may be specific to a subset of nodes. +Users can override the environment at the `ServerClass` or `Server` level, if you have requirements for different kernels or kernel parameters. + +See the [Environments](../../resource-configuration/environments/) section of our Configuration docs for examples and more detail. + +#### `Servers` + +These represent physical machines as resources in the management plane. +These `Servers` are created when the physical machine PXE boots and completes a "discovery" process in which it registers with the management plane and provides SMBIOS information such as the CPU manufacturer and version, and memory information. + +See the [Servers](../../resource-configuration/servers/) section of our Configuration docs for examples and more detail. + +#### `ServerClasses` + +`ServerClasses` are a grouping of the `Servers` mentioned above, grouped to create classes of servers based on Memory, CPU or other attributes. +These can be used to compose a bank of `Servers` that are eligible for provisioning. + +See the [ServerClasses](../../resource-configuration/serverclasses/) section of our Configuration docs for examples and more detail. + +### Sidero Controller Manager + +While the controller does not present unique CRDs within Kubernetes, it's important to understand the metadata resources that are returned to physical servers during the boot process. + +#### Metadata + +The Sidero controller manager server may be familiar to you if you have used cloud environments previously. +Using Talos machine configurations created by the Talos Cluster API bootstrap provider, along with patches specified by editing `Server`/`ServerClass` resources or `TalosConfig`/`TalosControlPlane` resources, metadata is returned to servers who query the controller manager at boot time. + +See the [Metadata](../../resource-configuration/metadata/) section of our Configuration docs for examples and more detail. diff --git a/website/content/v0.6/Overview/siderolink.md b/website/content/v0.6/Overview/siderolink.md new file mode 100644 index 000000000..c970e1fcc --- /dev/null +++ b/website/content/v0.6/Overview/siderolink.md @@ -0,0 +1,124 @@ +--- +description: "" +weight: 40 +title: SideroLink +--- + +SideroLink provides an overlay Wireguard point-to-point connection from every Talos machine to the Sidero. +Sidero provisions each machine with a unique IPv6 address and Wireguard key for the SideroLink connection. + +> Note: SideroLink is only supported with Talos >= 0.14. +> +> SideroLink doesn't provide a way for workload machines to communicate with each other, a connection is only +> point-to-point. + +SideroLink connection is both encrypted and authenticated, so Sidero uses that to map data streams coming from the machines +to a specific `ServerBinding`, `MetalMachine`, `Machine` and `Cluster`. + +Talos node sends two streams over the SideroLink connection: kernel logs (dmesg) and Talos event stream. +SideroLink is enabled automatically by Sidero when booting Talos. + +## Kernel Logs + +Kernel logs (`dmesg`) are streamed in real time from the Talos nodes to the `sidero-controller-manager` over SideroLink connection. +Log streaming starts when the kernel passes control to the `init` process, so kernel boot time logs will only be available when control +is passed to the userland. + +Logs can be accessed by accessing the logs of the `serverlogs` container of the `sidero-controller-manager` pod: + +```bash +$ kubectl -n sidero-system logs deployment/sidero-controller-manager -c serverlogs -f +{"clock":8576583,"cluster":"management-cluster","facility":"user","machine":"management-cluster-cp-ddgsw","metal_machine":"management-cluster-cp-vrff4","msg":"[talos] phase mountState (6/13): 1 tasks(s)\n","namespace":"default","priority":"warning","seq":665,"server_uuid":"6b121f82-24a8-4611-9d23-fa1a5ba564f0","talos-level":"warn","talos-time":"2022-02-11T12:42:02.74807823Z"} +... +``` + +The format of the message is the following: + +```json +{ + "clock": 8576583, + "cluster": "management-cluster", + "facility": "user", + "machine": "management-cluster-cp-ddgsw", + "metal_machine": "management-cluster-cp-vrff4", + "msg": "[talos] phase mountState (6/13): 1 tasks(s)\n", + "namespace": "default", + "priority": "warning", + "seq": 665, + "server_uuid": "6b121f82-24a8-4611-9d23-fa1a5ba564f0", + "talos-level": "warn", + "talos-time": "2022-02-11T12:42:02.74807823Z" +} +``` + +Kernel fields (see [Linux documentation](https://www.kernel.org/doc/Documentation/ABI/testing/dev-kmsg) for details): + +- `clock` is the kernel timestamp relative to the boot time +- `facility` of the message +- `msg` is the actual log message +- `seq` is the kernel log sequence +- `priority` is the message priority + +Talos-added fields: + +- `talos-level` is the translated `priority` into standard logging levels +- `talos-time` is the timestamp of the log message (accuracy of the timestamp depends on time sync) + +Sidero-added fields: + +- `server_uuid` is the `name` of the matching `Server` and `ServerBinding` resources +- `namespace` is the namespace of the `Cluster`, `MetalMachine` and `Machine` +- `cluster`, `metal_machine` and `machine` are the names of the matching `Cluster`, `MetalMachine` and `Machine` resources + +It might be a good idea to send container logs to some log aggregation system and filter the logs for a cluster or a machine. + +Quick filtering for a specific server: + +```bash +kubectl -n sidero-system logs deployment/sidero-controller-manager -c serverlogs | jq -R 'fromjson? | select(.server_uuid == "b4e677d9-b59b-4c1c-925a-f9d9ce049d79")' +``` + +## Talos Events + +Talos delivers system events over the SideroLink connection to the `sidero-link-manager` pod. +These events can be accessed with `talosctl events` command. +Events are mostly used to update `ServerBinding`/`MetalMachine` statuses, but they can be also seen in the logs of the `serverevents` container: + +```bash +$ kubectl -n sidero-system logs deployment/sidero-controller-manager -c serverevents -f +{"level":"info","ts":1644853714.2700942,"caller":"events-manager/adapter.go:153","msg":"incoming event","component":"sink","node":"[fdae:2859:5bb1:7a03:3ae3:be30:7ec4:4c09]:44530","id":"c857jkm1jjcc7393cbs0","type":"type.googleapis.com/machine. +AddressEvent","server_uuid":"b4e677d9-b59b-4c1c-925a-f9d9ce049d79","cluster":"management-cluster","namespace":"default","metal_machine":"management-cluster-cp-47lll","machine":"management-cluster-cp-7mpsh","hostname":"pxe-2","addresses":"172.25.0.5"} +``` + +## MetalMachine Conditions + +Sidero updates the statuses of `ServerBinding`/`MetalMachine` resources based on the events received from Talos node: + +- current addresses of the node +- statuses of machine configuration loading and validation, installation status + +See [Resources](../resources/) for details. + +## SideroLink State + +State of the SideroLink connection is kept in the `ServerBinding` resource: + +```yaml +spec: + siderolink: + address: fdae:2859:5bb1:7a03:3ae3:be30:7ec4:4c09/64 + publicKey: XIBT49g9xCoBvyb/x36J+ASlQ4qaxXMG20ZgKbBbfE8= +``` + +Installation-wide SideroLink state is kept in the `siderolink` `Secret` resource: + +```bash +$ kubectl get secrets siderolink -o yaml +apiVersion: v1 +data: + installation-id: QUtmZGFmVGJtUGVFcWp0RGMzT1BHSzlGcmlHTzdDQ0JCSU9aRzRSamdtWT0= + private-key: ME05bHhBd3JwV0hDczhNbm1aR3RDL1ZjK0ZSUFM5UzQwd25IU00wQ3dHOD0= +... +``` + +Key `installation-id` is used to generate unique SideroLink IPv6 addresses, and `private-key` is the Wireguard key of Sidero. diff --git a/website/content/v0.6/Overview/whatsnew.md b/website/content/v0.6/Overview/whatsnew.md new file mode 100644 index 000000000..b065277d4 --- /dev/null +++ b/website/content/v0.6/Overview/whatsnew.md @@ -0,0 +1,65 @@ +--- +description: "" +weight: 15 +title: What's New +--- + +### Cluster API v1.x (v1beta1) + +This release of Sidero brings compatibility with CAPI v1.x (v1beta1). + +### Cluster Template + +Sidero ships with new cluster template without `init` nodes. +This template is only compatible with Talos >= 0.14 (it requires SideroLink feature which was introduced in Talos 0.14). + +On upgrade, Sidero supports clusters running Talos < 0.14 if they were created before the upgrade. +Use [legacy template](https://github.com/talos-systems/sidero/blob/release-0.4/templates/cluster-template.yaml) to deploy clusters with Talos < 0.14. + +### New `MetalMachines` Conditions + +New set of conditions is now available which can simplify cluster troubleshooting: + +- `TalosConfigLoaded` is set to false when the config load has failed. +- `TalosConfigValidated` is set to false when the config validation +fails on the node. +- `TalosInstalled` is set to true/false when talos installer finishes. + +Requires Talos >= v0.14. + +### Machine Addresses + +Sidero now populates `MetalMachine` addresses with the ones discovered from Siderolink server events. +Which is then propagated to CAPI `Machine` resources. + +Requires Talos >= v0.14. + +### SideroLink + +Sidero now connects to all servers using SideroLink (available only with Talos >= 0.14). +This enables streaming of kernel logs and events back to Sidero. + +All server logs can now be viewed by getting logs of one of the container of the `sidero-controller-manager`: + +```bash +kubectl logs -f -n sidero-system deployment/sidero-controller-manager -c serverlogs +``` + +Events: + +```bash +kubectl logs -f -n sidero-system deployment/sidero-controller-manager -c serverevents +``` + +### iPXE Boot From Disk Method + +iPXE boot from disk method can now be set not only on the global level, but also in the Server and ServerClass specs. + +### IPMI PXE Method + +IPMI PXE method (UEFI, BIOS) can now be configured with `SIDERO_CONTROLLER_MANAGER_IPMI_PXE_METHOD` while installing Sidero. + +### Retry PXE Boot + +Sidero server controller now keeps track of Talos installation progress. +Now the node will be PXE booted until Talos installation succeeds. diff --git a/website/content/v0.6/Resource Configuration/_index.md b/website/content/v0.6/Resource Configuration/_index.md new file mode 100644 index 000000000..15e26c409 --- /dev/null +++ b/website/content/v0.6/Resource Configuration/_index.md @@ -0,0 +1,4 @@ +--- +title: "Resource Configuration" +weight: 30 +--- \ No newline at end of file diff --git a/website/content/v0.6/Resource Configuration/environments.md b/website/content/v0.6/Resource Configuration/environments.md new file mode 100644 index 000000000..3545d9f75 --- /dev/null +++ b/website/content/v0.6/Resource Configuration/environments.md @@ -0,0 +1,76 @@ +--- +description: "" +weight: 1 +title: Environments +--- + +Environments are a custom resource provided by the Metal Controller Manager. +An environment is a codified description of what should be returned by the PXE server when a physical server attempts to PXE boot. + +Especially important in the environment types are the kernel args. +From here, one can tweak the IP to the metadata server as well as various other kernel options that [Talos](https://www.talos.dev/docs/v0.13/reference/kernel/#commandline-parameters) and/or the Linux kernel supports. + +Environments can be supplied to a given server either at the Server or the ServerClass level. +The hierarchy from most to least respected is: + +- `.spec.environmentRef` provided at `Server` level +- `.spec.environmentRef` provided at `ServerClass` level +- `"default"` `Environment` created automatically and modified by an administrator + +A sample environment definition looks like this: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Environment +metadata: + name: default +spec: + kernel: + url: "https://github.com/talos-systems/talos/releases/download/v0.14.0/vmlinuz-amd64" + sha512: "" + args: + - console=tty0 + - console=ttyS1,115200n8 + - consoleblank=0 + - earlyprintk=ttyS1,115200n8 + - ima_appraise=fix + - ima_hash=sha512 + - ima_template=ima-ng + - init_on_alloc=1 + - initrd=initramfs.xz + - nvme_core.io_timeout=4294967295 + - printk.devkmsg=on + - pti=on + - random.trust_cpu=on + - slab_nomerge= + - talos.platform=metal + initrd: + url: "https://github.com/talos-systems/talos/releases/download/v0.14.0/initramfs-amd64.xz" + sha512: "" +``` + +Example of overriding `"default"` `Environment` at the `Server` level: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + environmentRef: + namespace: default + name: boot + ... +``` + +Example of overriding `"default"` `Environment` at the `ServerClass` level: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +... +spec: + environmentRef: + namespace: default + name: boot + ... +``` diff --git a/website/content/v0.6/Resource Configuration/metadata.md b/website/content/v0.6/Resource Configuration/metadata.md new file mode 100644 index 000000000..6cdeccdc1 --- /dev/null +++ b/website/content/v0.6/Resource Configuration/metadata.md @@ -0,0 +1,167 @@ +--- +description: "" +weight: 4 +title: Metadata +--- + +The Sidero controller manager manages the Machine metadata. +In terms of Talos (the OS on which the Kubernetes cluster is formed), this is the +"[machine config](https://www.talos.dev/docs/v0.13/reference/configuration/)", +which is used during the automated installation. + +## Talos Machine Configuration + +The configuration of each machine is constructed from a number of sources: + +- The `TalosControlPlane` custom resource for control plane nodes. +- The `TalosConfigTemplate` custom resource. +- The `ServerClass` which was used to select the `Server` into the `Cluster`. +- Any `Server`-specific patches. + +An example usage of setting a virtual IP for the control plane nodes and adding extra `node-labels` to nodes is shown below: + +> Note: because of the way JSON patches work the interface setting also needs to be set in `TalosControlPlane` when defining a Virtual IP. +This experience is not ideal, but will be addressed in a future release. + +*TalosControlPlane* custom resource: + +```yaml +apiVersion: controlplane.cluster.x-k8s.io/v1alpha3 +kind: TalosControlPlane +metadata: + name: workload-cluster + namespace: default +spec: + controlPlaneConfig: + controlplane: + configPatches: + - op: add + path: /machine/network + value: + interfaces: + - interface: eth0 + dhcp: true + vip: + ip: 172.16.200.52 + generateType: controlplane + talosVersion: v0.13 + init: + configPatches: + - op: add + path: /machine/network + value: + interfaces: + - interface: eth0 + dhcp: true + vip: + ip: 172.16.200.52 + generateType: init + talosVersion: v0.13 + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha3 + kind: MetalMachineTemplate + name: workload-cluster + replicas: 3 + version: v1.23.0 +``` + +*TalosConfigTemplate* custom resource: + +```yaml +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1alpha3 +kind: TalosConfigTemplate +metadata: + name: workload-cluster + namespace: default +spec: + template: + spec: + generateType: join + talosVersion: v0.13 + configPatches: + - op: add + path: /machine/kubelet + value: + extraArgs: + node-labels: + talos.dev/part-of: cluster/workload-cluster +``` + +and finally in the control plane `ServerClass` custom resource we augment the network information for other interfaces: + +```yaml +--- +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +metadata: + name: cp.small.x86 +spec: + configPatches: + - op: replace + path: /machine/install/disk + value: /dev/nvme0n1 + - op: add + path: /machine/install/extraKernelArgs + value: + - console=tty0 + - console=ttyS1,115200n8 + - op: add + path: /machine/network/interfaces/- + value: + interface: eth1 + dhcp: true + qualifiers: + cpu: + - version: Intel(R) Xeon(R) E-2124G CPU @ 3.40GHz + systemInformation: + - manufacturer: Supermicro + selector: + matchLabels: + metal.sidero.dev/serverclass: cp.small.x86 +``` + +the workload `ServerClass` defines the complete networking config + +```yaml +--- +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +metadata: + name: general.medium.x86 +spec: + configPatches: + - op: replace + path: /machine/install/disk + value: /dev/nvme1n1 + - op: add + path: /machine/install/extraKernelArgs + value: + - console=tty0 + - console=ttyS1,115200n8 + - op: add + path: /machine/network + value: + interfaces: + - interface: eth0 + dhcp: true + - interface: eth1 + dhcp: true + qualifiers: + cpu: + - version: Intel(R) Xeon(R) E-2136 CPU @ 3.30GHz + systemInformation: + - manufacturer: Supermicro + selector: + matchLabels: + metal.sidero.dev/serverclass: general.medium.x86 +``` + +The base template is constructed from the Talos bootstrap provider, using data from the associated `TalosControlPlane` and `TalosConfigTemplate` manifest. +Then, any configuration patches are applied from the `ServerClass` and `Server`. + +These patches take the form of an [RFC 6902](https://tools.ietf.org/html/rfc6902) JSON (or YAML) patch. +An example of the use of this patch method can be found in [Patching Guide](../../guides/patching/). + +Also note that while a `Server` can be a member of any number of `ServerClass`es, only the `ServerClass` which is used to select the `Server` into the `Cluster` will be used for the generation of the configuration of the `Machine`. +In this way, `Servers` may have a number of different configuration patch sets based on which `Cluster` they are in at any given time. diff --git a/website/content/v0.6/Resource Configuration/serverclasses.md b/website/content/v0.6/Resource Configuration/serverclasses.md new file mode 100644 index 000000000..272bdb858 --- /dev/null +++ b/website/content/v0.6/Resource Configuration/serverclasses.md @@ -0,0 +1,117 @@ +--- +description: "" +weight: 3 +title: Server Classes +--- + +Server classes are a way to group distinct server resources. +The `qualifiers` and `selector` keys allow the administrator to specify criteria upon which to group these servers. +If both of these keys are missing, the server class matches all servers that it is watching. +If both of these keys define requirements, these requirements are combined (logical `AND`). + +## `selector` + +`selector` groups server resources by their labels. +The [Kubernetes documentation][label-selector-docs] has more information on how to use this field. + +## `qualifiers` + +There are currently two keys: `cpu`, `systemInformation`. +Each of these keys accepts a list of entries. +The top level keys are a "logical `AND`", while the lists under each key are a "logical `OR`". +Qualifiers that are not specified are not evaluated. + +An example: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +metadata: + name: serverclass-sample +spec: + selector: + matchLabels: + common-label: "true" + matchExpressions: + - key: zone + operator: In + values: + - central + - east + - key: environment + operator: NotIn + values: + - prod + qualifiers: + cpu: + - manufacturer: "Intel(R) Corporation" + version: "Intel(R) Atom(TM) CPU C3558 @ 2.20GHz" + - manufacturer: Advanced Micro Devices, Inc. + version: AMD Ryzen 7 2700X Eight-Core Processor + systemInformation: + - manufacturer: Dell Inc. +``` + +Servers would only be added to the above class if they: + +- had _EITHER_ CPU info +- _AND_ the label key/value in `matchLabels` +- _AND_ match the `matchExpressions` + +Additionally, Sidero automatically creates and maintains a server class called `"any"` that includes all (accepted) servers. +Attempts to add qualifiers to it will be reverted. + +[label-selector-docs]: https://kubernetes.io/docs/reference/kubernetes-api/common-definitions/label-selector/ + +## `configPatches` + +Server configs of servers matching a server class can be updated by using the `configPatches` section of the custom resource. +See [patching](../guides/patching) for more information on how this works. + +An example of settings the default install disk for all servers matching a server class: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +... +spec: + configPatches: + - op: replace + path: /machine/install/disk + value: /dev/sda +``` + +## Other Settings + +### `environmentRef` + +Servers from a `ServerClass` can be set to use the specific `Environment` by linking the `Environment` from the `ServerClass`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +... +spec: + environmentRef: + name: production-env +``` + +### `bootFromDiskMethod` + +The method to exit iPXE network boot to force boot from disk can be configured for all `Server` resources belonging to the `ServerClass`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +... +spec: + bootFromDiskMethod: ipxe-sanboot +``` + +Valid values are: + +- `ipxe-exit` +- `http-404` +- `ipxe-sanboot` + +If not set, the default boot from disk method is used (`SIDERO_CONTROLLER_MANAGER_BOOT_FROM_DISK_METHOD`). diff --git a/website/content/v0.6/Resource Configuration/servers.md b/website/content/v0.6/Resource Configuration/servers.md new file mode 100644 index 000000000..969991765 --- /dev/null +++ b/website/content/v0.6/Resource Configuration/servers.md @@ -0,0 +1,183 @@ +--- +description: "" +weight: 2 +title: Servers +--- + +Servers are the basic resource of bare metal in the Metal Controller Manager. +These are created by PXE booting the servers and allowing them to send a registration request to the management plane. + +An example server may look like the following: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +metadata: + name: 00000000-0000-0000-0000-d05099d333e0 + labels: + common-label: "true" + zone: east + environment: test +spec: + accepted: false + configPatches: + - op: replace + path: /cluster/network/cni + value: + name: custom + urls: + - http://192.168.1.199/assets/cilium.yaml + cpu: + manufacturer: Intel(R) Corporation + version: Intel(R) Atom(TM) CPU C3558 @ 2.20GHz + system: + manufacturer: Dell Inc. +``` + +## Installation Disk + +An installation disk is required by Talos on bare metal. +This can be specified in a `configPatch`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + accepted: false + configPatches: + - op: replace + path: /machine/install/disk + value: /dev/sda +``` + +The install disk patch can also be set on the `ServerClass`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: ServerClass +... +spec: + configPatches: + - op: replace + path: /machine/install/disk + value: /dev/sda +``` + +## Server Acceptance + +In order for a server to be eligible for consideration, it _must_ be `accepted`. +This is an important separation point which all `Server`s must pass. +Before a `Server` is accepted, no write action will be performed against it. +Thus, it is safe for a computer to be added to a network on which Sidero is operating. +Sidero will never write to or wipe any disk on a computer which is not marked as `accepted`. + +This can be tedious for systems in which all attached computers should be considered to be under the control of Sidero. +Thus, you may also choose to automatically accept any machine into Sidero on its discovery. +Please keep in mind that this means that any newly-connected computer **WILL BE WIPED** automatically. +You can enable auto-acceptance by passing the `--auto-accept-servers=true` flag to `sidero-controller-manager`. + +Once accepted, a server will be reset (all disks wiped) and then made available to Sidero. + +You should never change an accepted `Server` to be _not_ accepted while it is in use. +Because servers which are not accepted will not be modified, if a server which +_was_ accepted is changed to _not_ accepted, the disk will _not_ be wiped upon +its exit. + +## IPMI + +Sidero can use IPMI information to control `Server` power state, reboot servers and set boot order. + +IPMI information will be, by default, setup automatically if possible as part of the acceptance process. +In this design, a "sidero" user will be added to the IPMI user list and a randomly generated password will be issued. +This information is then squirreled away in a Kubernetes secret in the `sidero-system` namespace, with a name format of `-bmc`. +Users wishing to turn off this feature can pass the `--auto-bmc-setup=false` flag to `sidero-controller-manager` + +IMPI connection information can also be set manually in the `Server` spec after initial registration: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + bmc: + endpoint: 10.0.0.25 + user: admin + pass: password +``` + +If IPMI information is set, server boot order might be set to boot from disk, then network, Sidero will switch servers +to PXE boot once that is required. + +Without IPMI info, Sidero can still register servers, wipe them and provision clusters, but Sidero won't be able to reboot servers once they are removed from the cluster. +**If IPMI info is not set, servers should be configured to boot first from network, then from disk.** + +Sidero can also fetch IPMI credentials via the `Secret` reference: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + bmc: + endpoint: 10.0.0.25 + userFrom: + secretKeyRef: + name: ipmi-credentials + key: username + passFrom: + secretKeyRef: + name: ipmi-credentials + key: password +``` + +As the `Server` resource is not namespaced, `Secret` should be created in the `default` namespace. + +## Other Settings + +### `cordoned` + +If `cordoned` is set to `true`, `Server` gets excluded from any `ServerClass` it might match based on qualifiers. +This means that the `Server` will not be allocated automatically. + +`Server` might be `cordoned` to temporarily take it out of the `ServerClass` to perform for example hardware maintenance. + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + cordoned: true +``` + +### `pxeBootAlways` + +`Server` might be forced to boot from the network even if the OS is already installed with `pxeBootAlways: true`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + pxeBootAlways: true +``` + +### `bootFromDiskMethod` + +The method to exit iPXE network boot to force boot from disk can be configured for the `Server`: + +```yaml +apiVersion: metal.sidero.dev/v1alpha1 +kind: Server +... +spec: + bootFromDiskMethod: ipxe-sanboot +``` + +Valid values are: + +- `ipxe-exit` +- `http-404` +- `ipxe-sanboot` + +If not set, the `ServerClass.spec.bootFromDiskMethod` value is used with the fallback to the default boot from disk method (`SIDERO_CONTROLLER_MANAGER_BOOT_FROM_DISK_METHOD`). diff --git a/website/content/v0.6/_index.md b/website/content/v0.6/_index.md new file mode 100644 index 000000000..591438dcd --- /dev/null +++ b/website/content/v0.6/_index.md @@ -0,0 +1,27 @@ +--- +title: "Welcome" +no_list: true +linkTitle: "Documentation" +cascade: + type: docs +preRelease: true +--- + +Welcome to the Sidero documentation. + +## Community + +- Slack: Join our [slack channel](https://slack.dev.talos-systems.io) +- Forum: [community](https://groups.google.com/a/SideroLabs.com/forum/#!forum/community) +- Twitter: [@SideroLabs](https://twitter.com/SideroLabs) +- Email: [info@SideroLabs.com](mailto:info@SideroLabs.com) + +If you're interested in this project and would like to help in engineering efforts, or have general usage questions, we are happy to have you! +We hold a weekly meeting that all audiences are welcome to attend. + +### Office Hours + +- When: Mondays at 16:30 UTC. +- Where: [Google Meet](https://meet.google.com/day-pxhv-zky). + +You can subscribe to this meeting by joining the community forum above. diff --git a/website/themes/docsy/layouts/partials/version-banner.html b/website/themes/docsy/layouts/partials/version-banner.html index 6908debe7..ff87881e0 100644 --- a/website/themes/docsy/layouts/partials/version-banner.html +++ b/website/themes/docsy/layouts/partials/version-banner.html @@ -7,8 +7,8 @@ {{ $color := "primary" }}
{{ with $current_version }}

Version {{ trim . "/" | markdownify }} of the - documentation is for the Talos version being developed. - {{ with $latest_version }}For the latest stable version of Talos, see the + documentation is for the Sidero version being developed. + {{ with $latest_version }}For the latest stable version of Sidero, see the latest version.

{{ end }} {{ end }}