Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16265 test: Split erasurecode/multiple_failure.py #15355

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/tests/ftest/erasurecode/multiple_rank_failure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'''
(C) Copyright 2021-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
'''
from ec_utils import ErasureCodeIor


class EcodOnlineMultiRankFail(ErasureCodeIor):
# pylint: disable=too-many-ancestors
"""
Test Class Description: To validate Erasure code object data after killing multiple rank,targets
while IOR Write in progress.
:avocado: recursive
"""

def __init__(self, *args, **kwargs):
"""Initialize a EcOnlineRebuild object."""
super().__init__(*args, **kwargs)
self.set_online_rebuild = True

def test_ec_multiple_rank_failure(self):
"""Jira ID: DAOS-7344.

Test Description: Test Erasure code object with IOR with multiple rank failure
Use Case: Create the pool, run IOR with supported EC object type class, kill multiple
server ranks, while IOR Write phase is in progress, verify all IOR write
finish.Read and verify data.

:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
:avocado: tags=EcodOnlineMultiRankFail,test_ec_multiple_rank_failure
"""
# Kill Two server ranks
self.rank_to_kill = [self.server_count - 1, self.server_count - 3]

# Write IOR data set with different EC object. kill rank, targets or mix of both while IOR
# Write phase is in progress.
self.log_step(
f"Write datasets using IOR and kill rank {self.rank_to_kill} while IOR is running")
self.ior_write_dataset()

# Disabled Online rebuild
self.set_online_rebuild = False

# Read IOR data and verify for EC object again
# EC data was written with +2 parity so after killing ranks of targets data should be
# intact and no data corruption observed.
self.log_step(f"Read datasets using IOR after killing rank {self.rank_to_kill}")
self.ior_read_dataset(parity=2)
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
hosts:
test_servers: 6
test_clients: 2

timeout: 2000

setup:
start_agents_once: false
start_servers_once: false

server_config:
name: daos_server
engines_per_host: 2
Expand All @@ -23,13 +26,16 @@ server_config:
fabric_iface_port: 31417
log_file: daos_server1.log
storage: auto

pool:
size: 93%
set_logmasks: False

container:
type: POSIX
control_method: daos
properties: cksum:crc16,cksum_size:16384,srv_cksum:on

ior:
api: "DFS"
client_processes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ec_utils import ErasureCodeIor


class EcodOnlineMultFail(ErasureCodeIor):
class EcodOnlineMultiTargetFail(ErasureCodeIor):
# pylint: disable=too-many-ancestors
"""
Test Class Description: To validate Erasure code object data after killing multiple rank,targets
Expand All @@ -23,6 +23,8 @@ def run_ior_cascade_failure(self):
"""Common function to Write and Read IOR"""
# Write IOR data set with different EC object. kill rank, targets or mix of both while IOR
# Write phase is in progress.
self.log_step(
f"Write datasets using IOR and exclude target {self.pool_exclude} while IOR is running")
self.ior_write_dataset()

# Disabled Online rebuild
Expand All @@ -31,25 +33,9 @@ def run_ior_cascade_failure(self):
# Read IOR data and verify for EC object again
# EC data was written with +2 parity so after killing ranks of targets data should be
# intact and no data corruption observed.
self.log_step(f"Read datasets using IOR after exclude target {self.pool_exclude}")
self.ior_read_dataset(parity=2)

def test_ec_multiple_rank_failure(self):
"""Jira ID: DAOS-7344.

Test Description: Test Erasure code object with IOR with multiple rank failure
Use Case: Create the pool, run IOR with supported EC object type class, kill multiple
server ranks, while IOR Write phase is in progress, verify all IOR write
finish.Read and verify data.

:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
:avocado: tags=EcodOnlineMultFail,test_ec_multiple_rank_failure
"""
# Kill Two server ranks
self.rank_to_kill = [self.server_count - 1, self.server_count - 3]
self.run_ior_cascade_failure()

def test_ec_multiple_targets_on_same_rank(self):
"""Jira ID: DAOS-7344.

Expand All @@ -61,7 +47,7 @@ def test_ec_multiple_targets_on_same_rank(self):
:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=ec,ec_array,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
:avocado: tags=EcodOnlineMultFail,test_ec_multiple_targets_on_same_rank
:avocado: tags=EcodOnlineMultiTargetFail,test_ec_multiple_targets_on_same_rank
"""
# Kill Two targets 2,4 from same rank 2
self.pool_exclude[2] = "2,4"
Expand All @@ -78,7 +64,7 @@ def test_ec_multiple_targets_on_diff_ranks(self):
:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=ec,ec_array,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
:avocado: tags=EcodOnlineMultFail,test_ec_multiple_targets_on_diff_ranks
:avocado: tags=EcodOnlineMultiTargetFail,test_ec_multiple_targets_on_diff_ranks
"""
# Kill Two targets from different ranks
self.pool_exclude[2] = "2"
Expand All @@ -96,7 +82,7 @@ def test_ec_single_target_rank_failure(self):
:avocado: tags=all,full_regression
:avocado: tags=hw,large
:avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
:avocado: tags=EcodOnlineMultFail,test_ec_single_target_rank_failure
:avocado: tags=EcodOnlineMultiTargetFail,test_ec_single_target_rank_failure
"""
# Kill One server rank
self.rank_to_kill = [self.server_count - 1]
Expand Down
65 changes: 65 additions & 0 deletions src/tests/ftest/erasurecode/multiple_target_failure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
hosts:
test_servers: 6
test_clients: 2

timeout: 2000

setup:
start_agents_once: false
start_servers_once: false

server_config:
name: daos_server
engines_per_host: 2
engines:
0:
pinned_numa_node: 0
nr_xs_helpers: 1
fabric_iface: ib0
fabric_iface_port: 31317
log_file: daos_server0.log
storage: auto
1:
pinned_numa_node: 1
nr_xs_helpers: 1
fabric_iface: ib1
fabric_iface_port: 31417
log_file: daos_server1.log
storage: auto

pool:
size: 93%
set_logmasks: False

container:
type: POSIX
control_method: daos
properties: cksum:crc16,cksum_size:16384,srv_cksum:on

ior:
api: "DFS"
client_processes:
np: 32
dfs_destroy: false
iorflags:
flags: "-w -W -F -k -G 1 -vv"
read_flags: "-r -R -F -k -G 1 -vv"
test_file: /testFile
repetitions: 1
sizes: !mux
Full_Striped:
chunk_block_transfer_sizes:
- [32M, 8G, 8M]
Partial_Striped:
chunk_block_transfer_sizes:
- [32M, 512M, 2K]
objectclass: !mux
EC_2P2GX:
dfs_oclass_list:
- ["EC_2P2GX", 6]
EC_4P2GX:
dfs_oclass_list:
- ["EC_4P2GX", 8]
EC_8P2GX:
dfs_oclass_list:
- ["EC_8P2GX", 10]
Loading