Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-15960 tests: Improvements for io_sys_admin test #14503

Merged
merged 9 commits into from
Aug 8, 2024
10 changes: 7 additions & 3 deletions src/tests/ftest/deployment/io_sys_admin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,17 @@ largefilecount:
- HDF5-VOL
- DFS
object_class:
- SX
- EC_2P1GX
ior_oclass: # Run once with SX and then with EC_16P2GX
- SX
- EC_2P1GX
mdtest_oclass: # Run once with S1 and then with EC_16P2G1
- S1
- EC_2P1G1
ior:
client_processes:
ppn: 30
dfs_destroy: False
flags: "-v -D 300 -W -w -r -R -F -k"
flags: "-v -D 300 -W -w -k"
test_file: daos:testFile
repetitions: 1
signature: 123
Expand Down
10 changes: 7 additions & 3 deletions src/tests/ftest/io/large_file_count.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ largefilecount:
- DFS
- POSIX
object_class:
- SX
- EC_2P1GX
ior_oclass: # Run once with SX and then with EC_16P2GX
- SX
- EC_2P1GX
mdtest_oclass: # Run once with S1 and then with EC_16P2G1
- S1
- EC_2P1G1
ior:
np: 30
dfs_destroy: false
flags: "-v -D 300 -W -w -r -R -F"
flags: "-v -D 300 -W -w -r -R"
test_file: /testFile
repetitions: 1
transfer_size: '1M'
Expand Down
10 changes: 7 additions & 3 deletions src/tests/ftest/io/small_file_count.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@ largefilecount:
- DFS
- POSIX
object_class:
- SX
- EC_2P1GX
ior_oclass: # Run once with SX and then with EC_16P2GX
- SX
- EC_2P1GX
mdtest_oclass: # Run once with S1 and then with EC_16P2G1
- S1
- EC_2P1G1
ior:
np: 30
dfs_destroy: false
flags: "-v -D 300 -W -w -r -R -F"
flags: "-v -D 300 -W -w -r -R"
test_file: /testFile
repetitions: 1
transfer_size: '1M'
Expand Down
18 changes: 17 additions & 1 deletion src/tests/ftest/util/data_mover_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,23 @@ def run_dm_activities_with_ior(self, tool, pool, cont, create_dataset=False):
daos_path = os.path.join(os.sep, test_file)
else:
self.fail("Invalid tool: {}".format(tool))

# Original flags used for write
flags = self.ior_cmd.flags.value

# Remove read and write from flags if present
flags = re.sub(" *-r", "", flags)
flags = re.sub(" *-R", "", flags)
flags = re.sub(" *-w", "", flags)
flags = re.sub(" *-W", "", flags)

# Remove stonewall
flags = re.sub(" *-D [0-9]+", "", flags)

# Add read flags
flags += " -r -R"

# update ior params, read back and verify data from cont3
self.run_ior_with_params(
"DAOS", daos_path, read_back_pool, read_back_cont,
flags="-r -R -F -k")
flags=flags)
84 changes: 56 additions & 28 deletions src/tests/ftest/util/file_count_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from avocado.core.exceptions import TestFail
from ior_test_base import IorTestBase
from mdtest_test_base import MdtestBase
Expand All @@ -15,77 +17,101 @@ class FileCountTestBase(IorTestBase, MdtestBase):
:avocado: recursive
"""

def add_containers(self, oclass=None):
def add_containers(self, file_oclass=None, dir_oclass=None):
"""Create a list of containers that the various jobs use for storage.

Args:
oclass: object class of container
file_oclass (str, optional): file object class of container.
Defaults to None.
dir_oclass (str, optional): dir object class of container.
Defaults to None.


"""
# Create a container and add it to the overall list of containers
container = self.get_container(self.pool, create=False)
# don't include oclass in daos cont cmd; include rd_fac based on the class
if oclass:
if file_oclass:
properties = container.properties.value
container.oclass.update(oclass)
redundancy_factor = extract_redundancy_factor(oclass)
container.file_oclass.update(file_oclass)
if dir_oclass:
container.dir_oclass.update(dir_oclass)
redundancy_factor = extract_redundancy_factor(file_oclass)
rd_fac = 'rd_fac:{}'.format(str(redundancy_factor))
properties = (",").join(filter(None, [properties, rd_fac]))
container.properties.update(properties)
container.create()

return container

def get_diroclass(self, rd_fac):
"""
Args
rd_fac (int): redundancy factor

Returns:
str: value for dir_oclass
"""

if rd_fac >= 2:
dir_oclass = "RP_3GX"
elif rd_fac == 1:
dir_oclass = "RP_2GX"
else:
dir_oclass = "SX"

return dir_oclass

def run_file_count(self):
"""Run the file count test."""
saved_containers = []
results = []
dir_oclass = None
apis = self.params.get("api", "/run/largefilecount/*")
object_class = self.params.get("object_class", '/run/largefilecount/*')
hdf5_plugin_path = self.params.get("plugin_path", '/run/hdf5_vol/*')
mount_dir = self.params.get("mount_dir", "/run/dfuse/*")
ior_np = self.params.get("np", '/run/ior/client_processes/*', 1)
ior_ppn = self.params.get("ppn", '/run/ior/client_processes/*', None)
mdtest_np = self.params.get("np", '/run/mdtest/client_processes/*', 1)
mdtest_ppn = self.params.get("ppn", '/run/mdtest/client_processes/*', None)
intercept = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so')
ior_oclass = self.params.get("ior_oclass", '/run/largefilecount/object_class/*')
mdtest_oclass = self.params.get("mdtest_oclass", '/run/largefilecount/object_class/*')

# create pool
self.add_pool(connect=False)

for oclass in object_class:
self.ior_cmd.dfs_oclass.update(oclass)
self.mdtest_cmd.dfs_oclass.update(oclass)
self.ior_cmd.dfs_dir_oclass.update(oclass)
# oclass_dir can not be EC must be RP based on rd_fac
rd_fac = extract_redundancy_factor(oclass)
if rd_fac >= 2:
self.mdtest_cmd.dfs_dir_oclass.update("RP_3G1")
elif rd_fac == 1:
self.mdtest_cmd.dfs_dir_oclass.update("RP_2G1")
else:
self.mdtest_cmd.dfs_dir_oclass.update("SX")
for api in apis:
self.ior_cmd.api.update(api)
self.mdtest_cmd.api.update(api)
# update test_dir for mdtest if api is DFS
if api == "DFS":
self.mdtest_cmd.test_dir.update("/")
# run mdtest
if self.mdtest_cmd.api.value in ['DFS', 'POSIX']:
for api in apis:
self.ior_cmd.api.update(api)
self.mdtest_cmd.api.update(api)
if api == "DFS":
self.mdtest_cmd.test_dir.update("/")
if self.mdtest_cmd.api.value in ['DFS', 'POSIX']:
for oclass in mdtest_oclass:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be better if the config did this?

    mdtest_oclass:
      # file, dir
      - [S1, SX]
      - [EC_2P1G1, RP_2GX]

And then the code could do

for file_oclass, dir_oclass in mdtest_oclass:

Which would

  1. Allow the dir oclass to be configurable without modifying the code
  2. Make it easier to understand what is being ran when looking at the config. Right now you have to dig into the code because it's hardcoded

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we discussed, let's ignore this for now since you will be OOO soon

self.log.info("=======>>>Starting MDTEST with %s and %s", api, oclass)
self.container = self.add_containers(oclass)
self.mdtest_cmd.dfs_oclass.update(oclass)
rd_fac = extract_redundancy_factor(oclass)
dir_oclass = self.get_diroclass(rd_fac)
self.mdtest_cmd.dfs_dir_oclass.update(dir_oclass)
self.container = self.add_containers(oclass, dir_oclass)
try:
self.processes = mdtest_np
self.ppn = mdtest_ppn
self.execute_mdtest()
if self.mdtest_cmd.api.value == 'POSIX':
self.mdtest_cmd.env.update(LD_PRELOAD=intercept, D_IL_REPORT='1')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to force all tests using this test base to use interception. Is that really what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's what we want as of now.

self.execute_mdtest()
else:
self.execute_mdtest()
results.append(["PASS", str(self.mdtest_cmd)])
except TestFail:
results.append(["FAIL", str(self.mdtest_cmd)])
# save the current container; to be destroyed later
if self.container is not None:
saved_containers.append(self.container)
for oclass in ior_oclass:
# run ior
self.log.info("=======>>>Starting IOR with %s and %s", api, oclass)
self.ior_cmd.dfs_oclass.update(oclass)
self.container = self.add_containers(oclass)
self.update_ior_cmd_with_pool(False)
try:
Expand All @@ -95,6 +121,8 @@ def run_file_count(self):
self.ior_cmd.api.update('HDF5')
self.run_ior_with_pool(
create_pool=False, plugin_path=hdf5_plugin_path, mount_dir=mount_dir)
elif self.ior_cmd.api.value == 'POSIX':
self.run_ior_with_pool(create_pool=False, intercept=intercept)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar - this is going to force all tests using this test base to use interception

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's what we want as of now.

else:
self.run_ior_with_pool(create_pool=False)
results.append(["PASS", str(self.ior_cmd)])
Expand Down
Loading