daos-stack · daltonbohning · Oct 22, 2024 · Oct 21, 2024
@@ -0,0 +1,51 @@
+'''
+  (C) Copyright 2021-2024 Intel Corporation.
+
+  SPDX-License-Identifier: BSD-2-Clause-Patent
+'''
+from ec_utils import ErasureCodeIor
+
+
+class EcodOnlineMultiRankFail(ErasureCodeIor):
+    # pylint: disable=too-many-ancestors
+    """
+    Test Class Description: To validate Erasure code object data after killing multiple rank,targets
+                            while IOR Write in progress.
+    :avocado: recursive
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Initialize a EcOnlineRebuild object."""
+        super().__init__(*args, **kwargs)
+        self.set_online_rebuild = True
+
+    def test_ec_multiple_rank_failure(self):
+        """Jira ID: DAOS-7344.
+
+        Test Description: Test Erasure code object with IOR with multiple rank failure
+        Use Case: Create the pool, run IOR with supported EC object type class, kill multiple
+                  server ranks, while IOR Write phase is in progress, verify all IOR write
+                  finish.Read and verify data.
+
+        :avocado: tags=all,full_regression
+        :avocado: tags=hw,large
+        :avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
+        :avocado: tags=EcodOnlineMultiRankFail,test_ec_multiple_rank_failure
+        """
+        # Kill Two server ranks
+        self.rank_to_kill = [self.server_count - 1, self.server_count - 3]
+
+        # Write IOR data set with different EC object. kill rank, targets or mix of both while IOR
+        # Write phase is in progress.
+        self.log_step(
+            f"Write datasets using IOR and kill rank {self.rank_to_kill} while IOR is running")
+        self.ior_write_dataset()
+
+        # Disabled Online rebuild
+        self.set_online_rebuild = False
+
+        # Read IOR data and verify for EC object again
+        # EC data was written with +2 parity so after killing ranks of targets data should be
+        # intact and no data corruption observed.
+        self.log_step(f"Read datasets using IOR after killing rank {self.rank_to_kill}")
+        self.ior_read_dataset(parity=2)
@@ -1,10 +1,13 @@
 hosts:
   test_servers: 6
   test_clients: 2
+
 timeout: 2000
+
 setup:
   start_agents_once: false
   start_servers_once: false
+
 server_config:
   name: daos_server
   engines_per_host: 2
@@ -23,13 +26,16 @@ server_config:
       fabric_iface_port: 31417
       log_file: daos_server1.log
       storage: auto
+
 pool:
   size: 93%
   set_logmasks: False
+
 container:
   type: POSIX
   control_method: daos
   properties: cksum:crc16,cksum_size:16384,srv_cksum:on
+
 ior:
   api: "DFS"
   client_processes:

@@ -6,7 +6,7 @@
 from ec_utils import ErasureCodeIor
 
 
-class EcodOnlineMultFail(ErasureCodeIor):
+class EcodOnlineMultiTargetFail(ErasureCodeIor):
     # pylint: disable=too-many-ancestors
     """
     Test Class Description: To validate Erasure code object data after killing multiple rank,targets
@@ -23,6 +23,8 @@ def run_ior_cascade_failure(self):
         """Common function to Write and Read IOR"""
         # Write IOR data set with different EC object. kill rank, targets or mix of both while IOR
         # Write phase is in progress.
+        self.log_step(
+            f"Write datasets using IOR and exclude target {self.pool_exclude} while IOR is running")
         self.ior_write_dataset()
 
         # Disabled Online rebuild
@@ -31,25 +33,9 @@ def run_ior_cascade_failure(self):
         # Read IOR data and verify for EC object again
         # EC data was written with +2 parity so after killing ranks of targets data should be
         # intact and no data corruption observed.
+        self.log_step(f"Read datasets using IOR after exclude target {self.pool_exclude}")
         self.ior_read_dataset(parity=2)
 
-    def test_ec_multiple_rank_failure(self):
-        """Jira ID: DAOS-7344.
-
-        Test Description: Test Erasure code object with IOR with multiple rank failure
-        Use Case: Create the pool, run IOR with supported EC object type class, kill multiple
-                  server ranks, while IOR Write phase is in progress, verify all IOR write
-                  finish.Read and verify data.
-
-        :avocado: tags=all,full_regression
-        :avocado: tags=hw,large
-        :avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
-        :avocado: tags=EcodOnlineMultFail,test_ec_multiple_rank_failure
-        """
-        # Kill Two server ranks
-        self.rank_to_kill = [self.server_count - 1, self.server_count - 3]
-        self.run_ior_cascade_failure()
-
     def test_ec_multiple_targets_on_same_rank(self):
         """Jira ID: DAOS-7344.
 
@@ -61,7 +47,7 @@ def test_ec_multiple_targets_on_same_rank(self):
         :avocado: tags=all,full_regression
         :avocado: tags=hw,large
         :avocado: tags=ec,ec_array,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
-        :avocado: tags=EcodOnlineMultFail,test_ec_multiple_targets_on_same_rank
+        :avocado: tags=EcodOnlineMultiTargetFail,test_ec_multiple_targets_on_same_rank
         """
         # Kill Two targets 2,4 from same rank 2
         self.pool_exclude[2] = "2,4"
@@ -78,7 +64,7 @@ def test_ec_multiple_targets_on_diff_ranks(self):
         :avocado: tags=all,full_regression
         :avocado: tags=hw,large
         :avocado: tags=ec,ec_array,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
-        :avocado: tags=EcodOnlineMultFail,test_ec_multiple_targets_on_diff_ranks
+        :avocado: tags=EcodOnlineMultiTargetFail,test_ec_multiple_targets_on_diff_ranks
         """
         # Kill Two targets from different ranks
         self.pool_exclude[2] = "2"
@@ -96,7 +82,7 @@ def test_ec_single_target_rank_failure(self):
         :avocado: tags=all,full_regression
         :avocado: tags=hw,large
         :avocado: tags=ec,ec_online_rebuild,rebuild,ec_fault,ec_multiple_failure
-        :avocado: tags=EcodOnlineMultFail,test_ec_single_target_rank_failure
+        :avocado: tags=EcodOnlineMultiTargetFail,test_ec_single_target_rank_failure
         """
         # Kill One server rank
         self.rank_to_kill = [self.server_count - 1]

@@ -0,0 +1,65 @@
+hosts:
+  test_servers: 6
+  test_clients: 2
+
+timeout: 2000
+
+setup:
+  start_agents_once: false
+  start_servers_once: false
+
+server_config:
+  name: daos_server
+  engines_per_host: 2
+  engines:
+    0:
+      pinned_numa_node: 0
+      nr_xs_helpers: 1
+      fabric_iface: ib0
+      fabric_iface_port: 31317
+      log_file: daos_server0.log
+      storage: auto
+    1:
+      pinned_numa_node: 1
+      nr_xs_helpers: 1
+      fabric_iface: ib1
+      fabric_iface_port: 31417
+      log_file: daos_server1.log
+      storage: auto
+
+pool:
+  size: 93%
+  set_logmasks: False
+
+container:
+  type: POSIX
+  control_method: daos
+  properties: cksum:crc16,cksum_size:16384,srv_cksum:on
+
+ior:
+  api: "DFS"
+  client_processes:
+    np: 32
+  dfs_destroy: false
+  iorflags:
+    flags: "-w -W -F -k -G 1 -vv"
+    read_flags: "-r -R -F -k -G 1 -vv"
+  test_file: /testFile
+  repetitions: 1
+  sizes: !mux
+    Full_Striped:
+      chunk_block_transfer_sizes:
+        - [32M, 8G, 8M]
+    Partial_Striped:
+      chunk_block_transfer_sizes:
+        - [32M, 512M, 2K]
+  objectclass: !mux
+    EC_2P2GX:
+      dfs_oclass_list:
+        - ["EC_2P2GX", 6]
+    EC_4P2GX:
+      dfs_oclass_list:
+        - ["EC_4P2GX", 8]
+    EC_8P2GX:
+      dfs_oclass_list:
+        - ["EC_8P2GX", 10]