diff --git a/buildchain/buildchain/salt_tree.py b/buildchain/buildchain/salt_tree.py index 356edc18eb..9bc1477e3d 100644 --- a/buildchain/buildchain/salt_tree.py +++ b/buildchain/buildchain/salt_tree.py @@ -522,6 +522,8 @@ def _get_parts(self) -> Iterator[str]: Path("salt/metalk8s/orchestrate/deploy_node.sls"), Path("salt/metalk8s/orchestrate/etcd.sls"), Path("salt/metalk8s/orchestrate/register_etcd.sls"), + Path("salt/metalk8s/orchestrate/backup/files/job.yaml.j2"), + Path("salt/metalk8s/orchestrate/backup/replication.sls"), Path("salt/metalk8s/orchestrate/bootstrap/init.sls"), Path("salt/metalk8s/orchestrate/bootstrap/accept-minion.sls"), Path("salt/metalk8s/orchestrate/bootstrap/pre-downgrade.sls"), diff --git a/docs/operation/disaster_recovery/bootstrap_backup_restore.rst b/docs/operation/disaster_recovery/bootstrap_backup_restore.rst index 76a42f55f6..88475e5e7e 100644 --- a/docs/operation/disaster_recovery/bootstrap_backup_restore.rst +++ b/docs/operation/disaster_recovery/bootstrap_backup_restore.rst @@ -25,7 +25,7 @@ To create a new backup file, run the following command: /srv/scality/metalk8s-|version|/backup.sh -Backup archives are stored in /var/lib/metalk8s/backups. +Backup archives are stored in /var/lib/metalk8s/backups on all master nodes. Restoring a Bootstrap Node ************************** diff --git a/salt/_modules/metalk8s.py b/salt/_modules/metalk8s.py index 1f8b456666..092476dd8d 100644 --- a/salt/_modules/metalk8s.py +++ b/salt/_modules/metalk8s.py @@ -691,3 +691,27 @@ def configure_archive(archive, remove=False): msg = "Archive '{0}' {1}".format(archive, msg) log.info(msg) return msg + + +def backup_node(): + metalk8s_version = __pillar__["metalk8s"]["cluster_version"] + archives = get_archives() + + try: + archive_path = archives[f"metalk8s-{metalk8s_version}"]["path"] + except KeyError as exc: + raise CommandExecutionError( + "No MetalK8s archive found for version {metalk8s_version}" + ) from exc + + backup_script = f"{archive_path}/backup.sh" + result = __salt__["cmd.run_all"](cmd=backup_script) + log.debug("Result: %r", result) + + if result["retcode"] != 0: + output = result.get("stderr") or result["stdout"] + raise CommandExecutionError(f"Failed to run {backup_script}: {output}") + + msg = "Backup successfully generated" + log.info(msg) + return msg diff --git a/salt/_utils/kubernetes_utils.py b/salt/_utils/kubernetes_utils.py index bef203911a..ed30d20a50 100644 --- a/salt/_utils/kubernetes_utils.py +++ b/salt/_utils/kubernetes_utils.py @@ -296,6 +296,13 @@ def __init__(self, model, api_cls, name, method_names=None): name="api_service", ), # }}} + # /apis/batch/v1/ {{{ + ("batch/v1", "Job"): KindInfo( + model=k8s_client.V1Job, + api_cls=k8s_client.BatchV1Api, + name="namespaced_job", + ), + # }}} # /apis/batch/v1beta1/ {{{ ("batch/v1beta1", "CronJob"): KindInfo( model=k8s_client.V1beta1CronJob, @@ -303,6 +310,13 @@ def __init__(self, model, api_cls, name, method_names=None): name="namespaced_cron_job", ), # }}} + # /apis/networking.k8s.io/v1/ {{{ + ("networking.k8s.io/v1", "NetworkPolicy"): KindInfo( + model=k8s_client.V1NetworkPolicy, + api_cls=k8s_client.NetworkingV1Api, + name="namespaced_network_policy", + ), + # }}} # /apis/networking.k8s.io/v1beta1/ {{{ ("networking.k8s.io/v1beta1", "Ingress"): KindInfo( model=k8s_client.NetworkingV1beta1Ingress, diff --git a/salt/metalk8s/backup/configured.sls b/salt/metalk8s/backup/configured.sls new file mode 100644 index 0000000000..1df97fe1e4 --- /dev/null +++ b/salt/metalk8s/backup/configured.sls @@ -0,0 +1,4 @@ +Schedule daily backup: + schedule.present: + - function: metalk8s.backup_node + - seconds: 86400 diff --git a/salt/metalk8s/orchestrate/backup/files/job.yaml.j2 b/salt/metalk8s/orchestrate/backup/files/job.yaml.j2 new file mode 100644 index 0000000000..5d875293c3 --- /dev/null +++ b/salt/metalk8s/orchestrate/backup/files/job.yaml.j2 @@ -0,0 +1,59 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: backup-replication-{{ node }} + namespace: kube-system + labels: + app.kubernetes.io/name: backup-replication + app.kubernetes.io/part-of: metalk8s + app.kubernetes.io/managed-by: salt +spec: + backoffLimit: 4 + parallelism: 1 + ttlSecondsAfterFinished: 120 + template: + metadata: + labels: + app.kubernetes.io/name: backup-replication + app.kubernetes.io/part-of: metalk8s + app.kubernetes.io/managed-by: salt + spec: + nodeName: {{ node }} + containers: + - name: backup-replication + image: {{ image }} + command: + - wget + - --accept="*.tar.gz" + - --no-host-directories + - --mirror + - --ca-certificate=/certificate/ca.crt + - --user=$(BACKUP_USERNAME) + - --password=$(BACKUP_PASSWORD) + - https://backup/ + env: + - name: BACKUP_USERNAME + valueFrom: + secretKeyRef: + name: backup-credentials + key: username + - name: BACKUP_PASSWORD + valueFrom: + secretKeyRef: + name: backup-credentials + key: password + volumeMounts: + - name: backups + mountPath: /backups + - name: ca-cert + mountPath: /certificate + workingDir: /backups + restartPolicy: Never + volumes: + - name: backups + hostPath: + path: /var/lib/metalk8s/backups + type: DirectoryOrCreate + - name: ca-cert + configMap: + name: backup-ca-cert diff --git a/salt/metalk8s/orchestrate/backup/replication.sls b/salt/metalk8s/orchestrate/backup/replication.sls new file mode 100644 index 0000000000..a6398711a3 --- /dev/null +++ b/salt/metalk8s/orchestrate/backup/replication.sls @@ -0,0 +1,16 @@ +{%- from "metalk8s/repo/macro.sls" import build_image_name with context %} + +{%- set master_nodes = salt.metalk8s.minions_by_role('master') %} +{%- set image = build_image_name("metalk8s-utils") %} + +{%- for node in master_nodes | sort %} + +Schedule backup replication Job on {{ node }}: + metalk8s_kubernetes.object_present: + - name: salt://{{ slspath }}/files/job.yaml.j2 + - template: jinja + - defaults: + node: {{ node }} + image: {{ image }} + +{%- endfor %} diff --git a/salt/metalk8s/roles/bootstrap/init.sls b/salt/metalk8s/roles/bootstrap/init.sls index e9fa8ede9f..f2c12ee3e8 100644 --- a/salt/metalk8s/roles/bootstrap/init.sls +++ b/salt/metalk8s/roles/bootstrap/init.sls @@ -3,3 +3,4 @@ include: - metalk8s.kubernetes.kubelet - metalk8s.salt.master - metalk8s.utils + - metalk8s.backup.configured diff --git a/salt/tests/unit/formulas/config.yaml b/salt/tests/unit/formulas/config.yaml index 35d30a8cf7..dc1119be7d 100644 --- a/salt/tests/unit/formulas/config.yaml +++ b/salt/tests/unit/formulas/config.yaml @@ -292,6 +292,15 @@ metalk8s: mode: master architecture: compact + backup: + files: + job.yaml.j2: + _cases: + "Create job manifest for a node": + extra_context: + node: master-1 + image: registry/some-image-name:tag + register_etcd.sls: _cases: "Target a new master node": &orch_target_master_node diff --git a/salt/tests/unit/modules/test_metalk8s.py b/salt/tests/unit/modules/test_metalk8s.py index d43b0e6705..6d6d4ede45 100644 --- a/salt/tests/unit/modules/test_metalk8s.py +++ b/salt/tests/unit/modules/test_metalk8s.py @@ -621,3 +621,31 @@ def test__read_bootstrap_config(self, raises=False, result=None): metalk8s._read_bootstrap_config(), "config", ) + + @parameterized.expand( + [ + (False, "Backup successfully generated"), + (True, "Failed to run .*/backup.sh: Boom!"), + ] + ) + def test_backup_node(self, raises, result): + def _cmd_run_all(cmd): + ret = {"retcode": 0, "stdout": "OK", "stderr": "Boom!"} + if raises: + ret["retcode"] = 1 + return ret + + salt_dict = {"cmd.run_all": MagicMock(side_effect=_cmd_run_all)} + pillar_dict = {"metalk8s": {"cluster_version": "2.10.0"}} + + with patch.dict(metalk8s.__salt__, salt_dict), patch.dict( + metalk8s.__pillar__, pillar_dict + ): + if raises: + self.assertRaisesRegex( + CommandExecutionError, + result, + metalk8s.backup_node, + ) + else: + self.assertEqual(metalk8s.backup_node(), result) diff --git a/scripts/backup.sh.in b/scripts/backup.sh.in index b6fc3984b0..3a07d1b1a6 100755 --- a/scripts/backup.sh.in +++ b/scripts/backup.sh.in @@ -12,7 +12,8 @@ TAR_OPTS=( "--atime-preserve" "--preserve-permissions" ) -BACKUP_ARCHIVE="/var/lib/metalk8s/backup_$(date -u +%Y%m%d_%H%M%S).tar.gz" +BACKUP_ARCHIVE="/var/lib/metalk8s/backups/$(date -u +%Y%m%d_%H%M%S).tar.gz" +REPLICATION=1 _usage() { echo "$(basename "$0") [options]" @@ -26,15 +27,17 @@ while (( "$#" )); do case "$1" in -v|--verbose) VERBOSE=1 - shift ;; -l|--log-file) LOGFILE="$2" - shift 2 + shift ;; -b|--backup-file) BACKUP_ARCHIVE="$2" - shift 2 + shift + ;; + -n|--no-replication) + REPLICATION=0 ;; *) # unsupported flags echo "Error: Unsupported flag $1" >&2 @@ -42,6 +45,7 @@ while (( "$#" )); do exit 1 ;; esac + shift done TMPFILES=$(mktemp -d) @@ -62,69 +66,9 @@ cleanup() { trap cleanup EXIT -run_quiet() { - local name=$1 - shift 1 - - echo -n "> ${name}..." - local start - start=$(date +%s) - set +e - "$@" 2>&1 | tee -ia "${LOGFILE}" > "${TMPFILES}/out" - local RC=$? - set -e - local end - end=$(date +%s) - - local duration=$(( end - start )) - - if [ $RC -eq 0 ]; then - echo " done [${duration}s]" - else - echo " fail [${duration}s]" - cat >/dev/stderr << EOM - -Failure while running step '${name}' - -Command: $@ - -Output: - -<< BEGIN >> -EOM - cat "${TMPFILES}/out" > /dev/stderr - - cat >/dev/stderr << EOM -<< END >> - -This script will now exit - -EOM - - exit 1 - fi -} - -run_verbose() { - local name=$1 - shift 1 - - echo "> ${name}..." - "$@" -} - -run() { - if [ "$VERBOSE" -eq 1 ]; then - run_verbose "${@}" - else - run_quiet "${@}" - fi -} - -die() { - echo 1>&2 "$@" - return 1 -} +BASE_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +# shellcheck disable=SC1090 +. "$BASE_DIR"/common.sh _save_cp() { local -r src="$(readlink -f "$1")" @@ -225,7 +169,16 @@ EOF tar "${TAR_OPTS[@]}" -C "$BACKUP_DIR" -cz -f "$BACKUP_ARCHIVE" ./ } +replicate_archives() { + salt_master_exec=(crictl exec -i "$(get_salt_container)") + + "${salt_master_exec[@]}" salt-run --state-output=mixed state.orchestrate \ + metalk8s.orchestrate.backup.replication \ + saltenv=metalk8s-@@VERSION +} + run "Backing up MetalK8s configurations" backup_metalk8s_conf run "Backing up CAs certificates and keys" backup_cas run "Backing up etcd data" backup_etcd run "Creating backup archive '$BACKUP_ARCHIVE'" create_archive +(( REPLICATION )) && run "Replicating backup archives on master nodes" replicate_archives diff --git a/scripts/downgrade.sh.in b/scripts/downgrade.sh.in index ee04f349cd..b1801581aa 100755 --- a/scripts/downgrade.sh.in +++ b/scripts/downgrade.sh.in @@ -190,4 +190,4 @@ run "Downgrading bootstrap" downgrade_bootstrap run "Launching the downgrade" launch_downgrade run "Launching the post-downgrade" launch_post_downgrade -"$BASE_DIR"/backup.sh +"$BASE_DIR"/backup.sh --no-replication diff --git a/scripts/upgrade.sh.in b/scripts/upgrade.sh.in index cbe7199bca..1dcf42c458 100755 --- a/scripts/upgrade.sh.in +++ b/scripts/upgrade.sh.in @@ -193,7 +193,7 @@ patch_kubesystem_namespace() { run "Performing Pre-Upgrade checks" precheck_upgrade [ $DRY_RUN -eq 1 ] && exit 0 -"$BASE_DIR"/backup.sh +"$BASE_DIR"/backup.sh --no-replication run "Upgrading bootstrap" upgrade_bootstrap run "Setting cluster version to $DESTINATION_VERSION" patch_kubesystem_namespace