Skip to content

Commit

Permalink
salt: Schedule daily backup on bootstrap node
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandre-allard committed Oct 12, 2021
1 parent 2d87ae6 commit e9e78c5
Show file tree
Hide file tree
Showing 15 changed files with 204 additions and 70 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# CHANGELOG
## Release 2.10.4 (in development)

### Features Added

- A daily backup of the bootstrap node is now automatically scheduled.
All the backups are also replicated onto all the master nodes.
(PR [#3557](https://github.com/scality/metalk8s/pull/3557))

## Release 2.10.3
### Enhancements

Expand Down
2 changes: 2 additions & 0 deletions buildchain/buildchain/salt_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,8 @@ def _get_parts(self) -> Iterator[str]:
Path("salt/metalk8s/orchestrate/deploy_node.sls"),
Path("salt/metalk8s/orchestrate/etcd.sls"),
Path("salt/metalk8s/orchestrate/register_etcd.sls"),
Path("salt/metalk8s/orchestrate/backup/files/job.yaml.j2"),
Path("salt/metalk8s/orchestrate/backup/replication.sls"),
Path("salt/metalk8s/orchestrate/bootstrap/init.sls"),
Path("salt/metalk8s/orchestrate/bootstrap/accept-minion.sls"),
Path("salt/metalk8s/orchestrate/bootstrap/pre-downgrade.sls"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ To create a new backup file, run the following command:
/srv/scality/metalk8s-|version|/backup.sh
Backup archives are stored in /var/lib/metalk8s/backups.
Backup archives are stored in /var/lib/metalk8s/backups on all master nodes.

Restoring a Bootstrap Node
**************************
Expand Down
24 changes: 24 additions & 0 deletions salt/_modules/metalk8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,3 +691,27 @@ def configure_archive(archive, remove=False):
msg = "Archive '{0}' {1}".format(archive, msg)
log.info(msg)
return msg


def backup_node():
metalk8s_version = __pillar__["metalk8s"]["cluster_version"]
archives = get_archives()

try:
archive_path = archives[f"metalk8s-{metalk8s_version}"]["path"]
except KeyError as exc:
raise CommandExecutionError(
f"No MetalK8s archive found for version {metalk8s_version}"
) from exc

backup_script = f"{archive_path}/backup.sh"
result = __salt__["cmd.run_all"](cmd=backup_script)
log.debug("Result: %r", result)

if result["retcode"] != 0:
output = result.get("stderr") or result["stdout"]
raise CommandExecutionError(f"Failed to run {backup_script}: {output}")

msg = "Backup successfully generated"
log.info(msg)
return msg
14 changes: 14 additions & 0 deletions salt/_utils/kubernetes_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,13 +296,27 @@ def __init__(self, model, api_cls, name, method_names=None):
name="api_service",
),
# }}}
# /apis/batch/v1/ {{{
("batch/v1", "Job"): KindInfo(
model=k8s_client.V1Job,
api_cls=k8s_client.BatchV1Api,
name="namespaced_job",
),
# }}}
# /apis/batch/v1beta1/ {{{
("batch/v1beta1", "CronJob"): KindInfo(
model=k8s_client.V1beta1CronJob,
api_cls=k8s_client.BatchV1beta1Api,
name="namespaced_cron_job",
),
# }}}
# /apis/networking.k8s.io/v1/ {{{
("networking.k8s.io/v1", "NetworkPolicy"): KindInfo(
model=k8s_client.V1NetworkPolicy,
api_cls=k8s_client.NetworkingV1Api,
name="namespaced_network_policy",
),
# }}}
# /apis/networking.k8s.io/v1beta1/ {{{
("networking.k8s.io/v1beta1", "Ingress"): KindInfo(
model=k8s_client.NetworkingV1beta1Ingress,
Expand Down
4 changes: 4 additions & 0 deletions salt/metalk8s/backup/configured.sls
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Schedule daily backup:
schedule.present:
- function: metalk8s.backup_node
- seconds: 86400
59 changes: 59 additions & 0 deletions salt/metalk8s/orchestrate/backup/files/job.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
apiVersion: batch/v1
kind: Job
metadata:
name: backup-replication-{{ node }}
namespace: kube-system
labels:
app.kubernetes.io/name: backup-replication
app.kubernetes.io/part-of: metalk8s
app.kubernetes.io/managed-by: salt
spec:
backoffLimit: 4
parallelism: 1
ttlSecondsAfterFinished: 120
template:
metadata:
labels:
app.kubernetes.io/name: backup-replication
app.kubernetes.io/part-of: metalk8s
app.kubernetes.io/managed-by: salt
spec:
nodeName: {{ node }}
containers:
- name: backup-replication
image: {{ image }}
command:
- wget
- --accept="*.tar.gz"
- --no-host-directories
- --mirror
- --ca-certificate=/certificate/ca.crt
- --user=$(BACKUP_USERNAME)
- --password=$(BACKUP_PASSWORD)
- https://backup/
env:
- name: BACKUP_USERNAME
valueFrom:
secretKeyRef:
name: backup-credentials
key: username
- name: BACKUP_PASSWORD
valueFrom:
secretKeyRef:
name: backup-credentials
key: password
volumeMounts:
- name: backups
mountPath: /backups
- name: ca-cert
mountPath: /certificate
workingDir: /backups
restartPolicy: Never
volumes:
- name: backups
hostPath:
path: /var/lib/metalk8s/backups
type: DirectoryOrCreate
- name: ca-cert
configMap:
name: backup-ca-cert
16 changes: 16 additions & 0 deletions salt/metalk8s/orchestrate/backup/replication.sls
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{%- from "metalk8s/repo/macro.sls" import build_image_name with context %}
{%- set master_nodes = salt.metalk8s.minions_by_role('master') %}
{%- set image = build_image_name("metalk8s-utils") %}
{%- for node in master_nodes | sort %}
Schedule backup replication Job on {{ node }}:
metalk8s_kubernetes.object_present:
- name: salt://{{ slspath }}/files/job.yaml.j2
- template: jinja
- defaults:
node: {{ node }}
image: {{ image }}
{%- endfor %}
1 change: 1 addition & 0 deletions salt/metalk8s/roles/bootstrap/init.sls
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ include:
- metalk8s.kubernetes.kubelet
- metalk8s.salt.master
- metalk8s.utils
- metalk8s.backup.configured
9 changes: 9 additions & 0 deletions salt/tests/unit/formulas/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,15 @@ metalk8s:
mode: master
architecture: compact

backup:
files:
job.yaml.j2:
_cases:
"Create job manifest for a node":
extra_context:
node: master-1
image: registry/some-image-name:tag

register_etcd.sls:
_cases:
"Target a new master node": &orch_target_master_node
Expand Down
15 changes: 15 additions & 0 deletions salt/tests/unit/modules/files/test_metalk8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -425,3 +425,18 @@ configure_archive:
invalid_path: True
raises: True
result: Invalid archive path

backup_node:
# 0. Ok - Backup succeed
- archives: &backup_node_archives
metalk8s-2.10.0:
path: /tmp
result: Backup successfully generated
# 1. Nok - Backup script failed
- archives: *backup_node_archives
raises: True
result: "Failed to run /tmp/backup.sh: Boom!"
# 2. Nok - No such archive for this MetalK8s version
- archives: {}
raises: True
result: No MetalK8s archive found for version 2.10.0
23 changes: 23 additions & 0 deletions salt/tests/unit/modules/test_metalk8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,3 +621,26 @@ def test__read_bootstrap_config(self, raises=False, result=None):
metalk8s._read_bootstrap_config(),
"config",
)

@utils.parameterized_from_cases(YAML_TESTS_CASES["backup_node"])
def test_backup_node(self, result, version="2.10.0", archives=None, raises=False):
def _cmd_run_all(cmd):
ret = {"retcode": 0, "stdout": "OK", "stderr": "Boom!"}
if raises:
ret["retcode"] = 1
return ret

salt_dict = {"cmd.run_all": MagicMock(side_effect=_cmd_run_all)}
pillar_dict = {"metalk8s": {"cluster_version": version}}

with patch.dict(metalk8s.__salt__, salt_dict), patch.dict(
metalk8s.__pillar__, pillar_dict
), patch("metalk8s.get_archives", MagicMock(return_value=archives or {})):
if raises:
self.assertRaisesRegex(
CommandExecutionError,
result,
metalk8s.backup_node,
)
else:
self.assertEqual(metalk8s.backup_node(), result)
88 changes: 21 additions & 67 deletions scripts/backup.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ TAR_OPTS=(
"--atime-preserve"
"--preserve-permissions"
)
BACKUP_ARCHIVE="/var/lib/metalk8s/backup_$(date -u +%Y%m%d_%H%M%S).tar.gz"
BACKUP_ARCHIVE="/var/lib/metalk8s/backups/$(date -u +%Y%m%d_%H%M%S).tar.gz"
REPLICATION=1

_usage() {
echo "$(basename "$0") [options]"
Expand All @@ -26,22 +27,26 @@ while (( "$#" )); do
case "$1" in
-v|--verbose)
VERBOSE=1
shift
;;
-l|--log-file)
LOGFILE="$2"
shift 2
shift
;;
-b|--backup-file)
BACKUP_ARCHIVE="$2"
shift 2
shift
;;
# Disable the backup replication on other master nodes
-n|--no-replication)
REPLICATION=0
;;
*) # unsupported flags
echo "Error: Unsupported flag $1" >&2
_usage
exit 1
;;
esac
shift
done

TMPFILES=$(mktemp -d)
Expand All @@ -62,69 +67,9 @@ cleanup() {

trap cleanup EXIT

run_quiet() {
local name=$1
shift 1

echo -n "> ${name}..."
local start
start=$(date +%s)
set +e
"$@" 2>&1 | tee -ia "${LOGFILE}" > "${TMPFILES}/out"
local RC=$?
set -e
local end
end=$(date +%s)

local duration=$(( end - start ))

if [ $RC -eq 0 ]; then
echo " done [${duration}s]"
else
echo " fail [${duration}s]"
cat >/dev/stderr << EOM
Failure while running step '${name}'
Command: $@
Output:
<< BEGIN >>
EOM
cat "${TMPFILES}/out" > /dev/stderr

cat >/dev/stderr << EOM
<< END >>
This script will now exit
EOM

exit 1
fi
}

run_verbose() {
local name=$1
shift 1

echo "> ${name}..."
"$@"
}

run() {
if [ "$VERBOSE" -eq 1 ]; then
run_verbose "${@}"
else
run_quiet "${@}"
fi
}

die() {
echo 1>&2 "$@"
return 1
}
BASE_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
# shellcheck disable=SC1090
. "$BASE_DIR"/common.sh

_save_cp() {
local -r src="$(readlink -f "$1")"
Expand Down Expand Up @@ -225,7 +170,16 @@ EOF
tar "${TAR_OPTS[@]}" -C "$BACKUP_DIR" -cz -f "$BACKUP_ARCHIVE" ./
}

replicate_archives() {
salt_master_exec=(crictl exec -i "$(get_salt_container)")

"${salt_master_exec[@]}" salt-run --state-output=mixed state.orchestrate \
metalk8s.orchestrate.backup.replication \
saltenv=metalk8s-@@VERSION
}

run "Backing up MetalK8s configurations" backup_metalk8s_conf
run "Backing up CAs certificates and keys" backup_cas
run "Backing up etcd data" backup_etcd
run "Creating backup archive '$BACKUP_ARCHIVE'" create_archive
(( REPLICATION )) && run "Replicating backup archives on master nodes" replicate_archives
5 changes: 4 additions & 1 deletion scripts/downgrade.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,7 @@ run "Downgrading bootstrap" downgrade_bootstrap
run "Launching the downgrade" launch_downgrade
run "Launching the post-downgrade" launch_post_downgrade

"$BASE_DIR"/backup.sh
# NOTE: We use --no-replication flag since the backup-server is not available
# in the previous version of MetalK8s
# This flag can safely be removed in MetalK8s 2.12.
"$BASE_DIR"/backup.sh --no-replication
5 changes: 4 additions & 1 deletion scripts/upgrade.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ patch_kubesystem_namespace() {
run "Performing Pre-Upgrade checks" precheck_upgrade
[ $DRY_RUN -eq 1 ] && exit 0

"$BASE_DIR"/backup.sh
# NOTE: We use --no-replication flag since the backup-server is not available
# in the previous version of MetalK8s.
# This flag can safely be removed in MetalK8s 2.12.
"$BASE_DIR"/backup.sh --no-replication

run "Upgrading bootstrap" upgrade_bootstrap
run "Setting cluster version to $DESTINATION_VERSION" patch_kubesystem_namespace
Expand Down

0 comments on commit e9e78c5

Please sign in to comment.