Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set up mirroring for multiple cephblockpools #1829

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 55 additions & 34 deletions test/addons/rbd-mirror/start
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: Apache-2.0

import base64
import concurrent.futures
import json
import os
import sys
Expand All @@ -18,7 +19,7 @@ from drenv import ceph
from drenv import commands
from drenv import kubectl

POOL_NAME = "replicapool"
POOL_NAMES = ["replicapool", "replicapool-2"]


def log_blocklist(cluster):
Expand All @@ -28,12 +29,12 @@ def log_blocklist(cluster):
print(yaml.dump(info, sort_keys=False))


def fetch_secret_info(cluster):
def fetch_secret_info(cluster, pool):
info = {}

print(f"Getting mirroring info site name for cluster '{cluster}'")
print(f"Getting mirroring info site name for cluster '{cluster}' and cephblockpool {pool}")
info["name"] = drenv.wait_for(
f"cephblockpools.ceph.rook.io/{POOL_NAME}",
f"cephblockpools.ceph.rook.io/{pool}",
output="jsonpath={.status.mirroringInfo.site_name}",
namespace="rook-ceph",
profile=cluster,
Expand All @@ -42,7 +43,7 @@ def fetch_secret_info(cluster):
print(f"Getting rbd mirror boostrap peer secret name for cluster '{cluster}'")
secret_name = kubectl.get(
"cephblockpools.ceph.rook.io",
POOL_NAME,
pool,
"--output=jsonpath={.status.info.rbdMirrorBootstrapPeerSecretName}",
"--namespace=rook-ceph",
context=cluster,
Expand All @@ -58,7 +59,7 @@ def fetch_secret_info(cluster):
)

# Must be encoded as base64 in secret .data section.
info["pool"] = base64.b64encode(POOL_NAME.encode()).decode()
info["pool"] = base64.b64encode(pool.encode()).decode()

return info

Expand All @@ -83,8 +84,8 @@ def disable_rbd_mirror_debug_logs(cluster):
ceph.rm_config(cluster, "mgr", "mgr/rbd_support/log_level")


def configure_rbd_mirroring(cluster, peer_info):
print(f"Applying rbd mirror secret in cluster '{cluster}'")
def configure_rbd_mirroring(cluster, pool, peer_info):
print(f"Applying rbd mirror secret in cluster '{cluster}' for cephblockpool {pool}")

template = drenv.template("start-data/rbd-mirror-secret.yaml")
yaml = template.substitute(peer_info)
Expand All @@ -96,23 +97,33 @@ def configure_rbd_mirroring(cluster, peer_info):
)

print(f"Configure peers for cluster '{cluster}'")
patch = {"spec": {"mirroring": {"peers": {"secretNames": [peer_info["name"]]}}}}
patch = {
"spec": {"mirroring": {"peers": {"secretNames": [peer_info["name"]]}}}
}
kubectl.patch(
"cephblockpool",
POOL_NAME,
pool,
"--type=merge",
f"--patch={json.dumps(patch)}",
"--namespace=rook-ceph",
context=cluster,
)

vrc_name = "vrc-sample"
vgrc_name = "vgrc-sample"
if pool != 'replicapool':
num = pool.split("-")[1]
vrc_name = f"{vrc_name}-{num}"
vgrc_name = f"{vgrc_name}-{num}"

Comment on lines +112 to +118
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though the logic looks nice, I would not check if the name is replicapool and then proceed to create secondary vrc and vgrc. Also the storageIDs would be same for both VRC and VGRC in this case, replicationID should also change once storageClass is different. I have a PR opened with similar changes. Once that PR gets merged, you probably need to rebase and work on top of it.

print("Creating VolumeReplicationClass")
template = drenv.template("start-data/vrc-sample.yaml")
yaml = template.substitute(cluster=cluster)
yaml = template.substitute(name=vrc_name, cluster=cluster, pool=pool)
kubectl.apply("--filename=-", input=yaml, context=cluster)

print("Creating VolumeGroupReplicationClass")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since planning to have multiple vrc and vrgc, logging their names would make it clearer and easier to debug.

template = drenv.template("start-data/vgrc-sample.yaml")
yaml = template.substitute(cluster=cluster, pool=POOL_NAME)
yaml = template.substitute(name=vgrc_name, cluster=cluster, pool=pool)
kubectl.apply("--filename=-", input=yaml, context=cluster)

print(f"Apply rbd mirror to cluster '{cluster}'")
Expand Down Expand Up @@ -144,14 +155,14 @@ def wait_until_rbd_mirror_is_ready(cluster):
print(yaml.dump(info, sort_keys=False))


def wait_until_pool_mirroring_is_healthy(cluster, attempts=3):
def wait_until_pool_mirroring_is_healthy(cluster, pool, attempts=3):
# We have random timeouts when rbd-mirror fail to connect to the peer.
# When this happens we can recover by restarting the rbd-mirror daemon.

for i in range(1, attempts + 1):
print(f"Waiting for mirroring health in cluster '{cluster}' ({i}/{attempts})")
try:
watch_pool_mirroring_status(cluster)
watch_pool_mirroring_status(cluster, pool)
except commands.Timeout:
if i == attempts:
raise
Expand All @@ -163,16 +174,16 @@ def wait_until_pool_mirroring_is_healthy(cluster, attempts=3):

status = kubectl.get(
"cephblockpools.ceph.rook.io",
POOL_NAME,
pool,
"--output=jsonpath={.status}",
"--namespace=rook-ceph",
context=cluster,
)
info = {f"Cluster '{cluster}' ceph block pool status": json.loads(status)}
info = {f"Cluster '{cluster}' cephblockpool {pool} status": json.loads(status)}
print(yaml.dump(info, sort_keys=False))


def watch_pool_mirroring_status(cluster, timeout=180):
def watch_pool_mirroring_status(cluster, pool, timeout=180):
"""
Watch pool .status.mirroringStatus.summary until mirroring is healthy or
the timeout expires.
Expand All @@ -183,7 +194,7 @@ def watch_pool_mirroring_status(cluster, timeout=180):
while True:
remaining = deadline - time.monotonic()
watcher = kubectl.watch(
f"cephblockpool/{POOL_NAME}",
f"cephblockpool/{pool}",
jsonpath="{.status.mirroringStatus.summary}",
namespace="rook-ceph",
timeout=remaining,
Expand Down Expand Up @@ -227,6 +238,26 @@ def restart_rbd_mirror_daemon(cluster):
)


def test(cluster1, cluster2, pool):
cluster1_info = fetch_secret_info(cluster1, pool)
cluster2_info = fetch_secret_info(cluster2, pool)

print(f"Setting up mirroring from '{cluster2}' to '{cluster1}'")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log "Setting up mirroring for '{pool}' from...."

configure_rbd_mirroring(cluster1, pool, cluster2_info)

print(f"Setting up mirroring from '{cluster1}' to '{cluster2}'")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

configure_rbd_mirroring(cluster2, pool, cluster1_info)

wait_until_rbd_mirror_is_ready(cluster1)
wait_until_rbd_mirror_is_ready(cluster2)

log_blocklist(cluster1)
log_blocklist(cluster2)

wait_until_pool_mirroring_is_healthy(cluster1, pool)
wait_until_pool_mirroring_is_healthy(cluster2, pool)


if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} cluster1 cluster2")
sys.exit(1)
Expand All @@ -243,22 +274,12 @@ else:
disable_rbd_mirror_debug_logs(cluster1)
disable_rbd_mirror_debug_logs(cluster2)

cluster1_info = fetch_secret_info(cluster1)
cluster2_info = fetch_secret_info(cluster2)

print(f"Setting up mirroring from '{cluster2}' to '{cluster1}'")
configure_rbd_mirroring(cluster1, cluster2_info)

print(f"Setting up mirroring from '{cluster1}' to '{cluster2}'")
configure_rbd_mirroring(cluster2, cluster1_info)

wait_until_rbd_mirror_is_ready(cluster1)
wait_until_rbd_mirror_is_ready(cluster2)

log_blocklist(cluster1)
log_blocklist(cluster2)
with concurrent.futures.ThreadPoolExecutor() as e:
tests = []
for pool in POOL_NAMES:
tests.append(e.submit(test, cluster1, cluster2, pool))

wait_until_pool_mirroring_is_healthy(cluster1)
wait_until_pool_mirroring_is_healthy(cluster2)
for t in concurrent.futures.as_completed(tests):
t.result()

print("Mirroring was setup successfully")
2 changes: 1 addition & 1 deletion test/addons/rbd-mirror/start-data/vgrc-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
apiVersion: replication.storage.openshift.io/v1alpha1
kind: VolumeGroupReplicationClass
metadata:
name: vgrc-sample
name: $name
labels:
ramendr.openshift.io/storageid: rook-ceph-$cluster-1
ramendr.openshift.io/replicationid: rook-ceph-replication-1
Expand Down
4 changes: 3 additions & 1 deletion test/addons/rbd-mirror/start-data/vrc-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
apiVersion: replication.storage.openshift.io/v1alpha1
kind: VolumeReplicationClass
metadata:
name: vrc-sample
name: $name
labels:
ramendr.openshift.io/storageid: rook-ceph-$cluster-1
ramendr.openshift.io/replicationid: rook-ceph-replication-1
spec:
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
clusterID: rook-ceph
pool: $pool
replication.storage.openshift.io/replication-secret-name: rook-csi-rbd-provisioner
replication.storage.openshift.io/replication-secret-namespace: rook-ceph
schedulingInterval: 1m
66 changes: 34 additions & 32 deletions test/addons/rook-pool/start
Original file line number Diff line number Diff line change
Expand Up @@ -43,38 +43,40 @@ def deploy(cluster):

def wait(cluster):
print("Waiting until ceph block pool is ready")
drenv.wait_for(
"cephblockpool/replicapool",
output="jsonpath={.status.phase}",
namespace="rook-ceph",
timeout=120,
profile=cluster,
)
kubectl.wait(
"cephblockpool/replicapool",
"--for=jsonpath={.status.phase}=Ready",
"--namespace=rook-ceph",
"--timeout=300s",
context=cluster,
)

print("Waiting for replica pool peer token")
kubectl.wait(
"cephblockpool/replicapool",
"--for=jsonpath={.status.info.rbdMirrorBootstrapPeerSecretName}=pool-peer-token-replicapool",
"--namespace=rook-ceph",
"--timeout=300s",
context=cluster,
)

out = kubectl.get(
"cephblockpool/replicapool",
"--output=jsonpath={.status}",
"--namespace=rook-ceph",
context=cluster,
)
info = {"ceph pool status": json.loads(out)}
print(yaml.dump(info, sort_keys=False))
for pool in POOL_NAMES:
drenv.wait_for(
f"cephblockpool/{pool}",
output="jsonpath={.status.phase}",
namespace="rook-ceph",
timeout=120,
profile=cluster,
)
kubectl.wait(
f"cephblockpool/{pool}",
"--for=jsonpath={.status.phase}=Ready",
"--namespace=rook-ceph",
"--timeout=300s",
context=cluster,
)

print("Waiting for replica pool peer token")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pool name needs to be logged here

kubectl.wait(
f"cephblockpool/{pool}",
"--for=jsonpath={.status.info.rbdMirrorBootstrapPeerSecretName}"
f"=pool-peer-token-{pool}",
"--namespace=rook-ceph",
"--timeout=300s",
context=cluster,
)

out = kubectl.get(
f"cephblockpool/{pool}",
"--output=jsonpath={.status}",
"--namespace=rook-ceph",
context=cluster,
)
info = {"ceph pool status": json.loads(out)}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

log ceph pool status with pool name

print(yaml.dump(info, sort_keys=False))


if len(sys.argv) != 2:
Expand Down