Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/openmp #1411

Merged
merged 27 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b961afa
Baseline additions to get OpenMP (host) enabled
maddyscientist Sep 26, 2023
3a59ef6
OMP parallelizaton of gauge force / path reference code
maddyscientist Sep 26, 2023
1b91ee3
OMP parallelization of host reference fat-link construction
maddyscientist Sep 26, 2023
1f514b8
Small cleanup
maddyscientist Oct 4, 2023
bc0bdf0
Use same host-constructed gauge field across different partitioning i…
maddyscientist Oct 5, 2023
a70c935
Use same host-construct gauge field across different partitioning in …
maddyscientist Oct 5, 2023
8d0c26b
Apply some OMP parallelization to staggered_host_utils and remove unn…
maddyscientist Oct 5, 2023
535f1a7
OMP parallelization to host_utils.cpp
maddyscientist Oct 5, 2023
7795879
OMP parallelization to hisq_force_reference
maddyscientist Oct 5, 2023
12b9632
OMP parallelization to staggered reference dslash
maddyscientist Oct 5, 2023
6ee50e4
Merge branch 'develop' of github.com:lattice/quda into feature/openmp
maddyscientist Oct 20, 2023
1fe0e25
Disable unknown pragma warning for quda_test
maddyscientist Oct 20, 2023
ca880a8
Fix omp_threads string to print correctly (use omp_get_max_threads() …
maddyscientist Oct 20, 2023
8308347
Apply clang-format
maddyscientist Oct 20, 2023
e692811
Add ColorSpinorField::empty() method, matching functionality of Gauge…
maddyscientist Oct 24, 2023
f07b7b0
QUDA_CTEST_LAUNCH should use FORCE to ensure user can change after CM…
maddyscientist Oct 24, 2023
9768fcd
Improve robustness of dslash tests: source vector is now set once and…
maddyscientist Oct 24, 2023
ece7818
Add GaugeField::raw_pointer() which returns the raw pointer of the ga…
maddyscientist Oct 25, 2023
ba996e7
Fix dslash tests with split grid
maddyscientist Oct 25, 2023
80498fb
invert_test gtest should skip multi-shift solver if using slit grid
maddyscientist Oct 25, 2023
edcf487
Add (staggered_)dslash_ctest split-grid tests to ctest. In test code…
maddyscientist Oct 25, 2023
3962371
Revert addition of FORCE to QUDA_CTEST_LAUNCH since this breaks CSCS CI
maddyscientist Oct 25, 2023
72df338
Add split-grid invert_test to ctest. For now, this test is only run …
maddyscientist Oct 26, 2023
3b7335c
Merge branch 'develop' of github.com:lattice/quda into feature/openmp
maddyscientist Oct 30, 2023
d0991e6
Merge branch 'develop' of github.com:lattice/quda into feature/openmp
maddyscientist Nov 7, 2023
452ee46
Fix OMP bug with long-link generation on host
maddyscientist Nov 7, 2023
868a7d3
Merge branch 'feature/openmp' of github.com:lattice/quda into feature…
maddyscientist Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/color_spinor_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,12 @@ namespace quda
*/
ColorSpinorField &operator=(ColorSpinorField &&field);

/**
@brief Returns if the object is empty (not initialized)
@return true if the object has not been allocated, otherwise false
*/
bool empty() const { return !init; }
maddyscientist marked this conversation as resolved.
Show resolved Hide resolved

/**
@brief Copy the source field contents into this
@param[in] src Source from which we are copying
Expand Down
11 changes: 11 additions & 0 deletions include/gauge_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,17 @@ namespace quda {
return reinterpret_cast<T>(gauge_array[d].data());
}

void *raw_pointer() const
{
if (is_pointer_array(order)) {
static void *data_array[8];
for (int i = 0; i < site_dim; i++) data_array[i] = gauge_array[i].data();
return data_array;
} else {
return gauge.data();
}
}

/**
@brief Return array of pointers to the per dimension gauge field allocation(s).
@tparam T Optional type to cast the pointer to (default is
Expand Down
3 changes: 3 additions & 0 deletions include/quda_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,8 @@

#elif defined(QUDA_TARGET_SYCL)
#include <targets/sycl/quda_sycl.h>
#endif

#ifdef QUDA_OPENMP
#include <omp.h>
#endif
2 changes: 1 addition & 1 deletion include/util_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ char *getPrintBuffer();
number of OMP threads for CPU functions recorded in the tune cache.
@return Returns the string
*/
char* getOmpThreadStr();
const char *getOmpThreadStr();

void errorQuda_(const char *func, const char *file, int line, ...);

Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ endif()

if(QUDA_OPENMP)
target_link_libraries(quda PUBLIC OpenMP::OpenMP_CXX)
target_compile_definitions(quda PUBLIC QUDA_OPENMP)
endif()

# set which precisions to enable
Expand Down
2 changes: 1 addition & 1 deletion lib/color_spinor_field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace quda
{
if (&src != this) {
// if field not already initialized then move the field
if (!init || are_compatible(*this, src)) {
if (!init || are_compatible(*this, src) || src.empty()) {
if (init) destroy();
LatticeField::operator=(std::move(src));
move(std::move(src));
Expand Down
7 changes: 3 additions & 4 deletions lib/interface_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3219,11 +3219,10 @@ void callMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, // col
// the split topology.
logQuda(QUDA_DEBUG_VERBOSE, "Split grid loading gauge field...\n");
if (!is_staggered) {
loadGaugeQuda(collected_gauge->data(), gauge_param);
loadGaugeQuda(collected_gauge->raw_pointer(), gauge_param);
} else {
// freeGaugeQuda();
loadFatLongGaugeQuda(param, gauge_param, collected_milc_fatlink_field->data(),
collected_milc_longlink_field->data());
loadFatLongGaugeQuda(param, gauge_param, collected_milc_fatlink_field->raw_pointer(),
collected_milc_longlink_field->raw_pointer());
}
logQuda(QUDA_DEBUG_VERBOSE, "Split grid loaded gauge field...\n");

Expand Down
14 changes: 7 additions & 7 deletions lib/util_quda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,17 +134,17 @@ void popOutputPrefix()

char *getPrintBuffer() { return buffer_; }

char* getOmpThreadStr() {
static char omp_thread_string[128];
const char *getOmpThreadStr()
{
static std::string omp_thread_string;
static bool init = false;
if (!init) {
strcpy(omp_thread_string,"omp_threads=");
char *omp_threads = getenv("OMP_NUM_THREADS");
strcat(omp_thread_string, omp_threads ? omp_threads : "1");
strcat(omp_thread_string, ",");
#ifdef QUDA_OPENMP
omp_thread_string = std::string("omp_threads=" + std::to_string(omp_get_max_threads()) + ",");
#endif
init = true;
}
return omp_thread_string;
return omp_thread_string.c_str();
}

void errorQuda_(const char *func, const char *file, int line, ...)
Expand Down
47 changes: 47 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ target_compile_options(
$<IF:$<CONFIG:RELEASE>,-w,-Wall -Wextra>
$<$<CONFIG:STRICT>:-Werror>
)

# ignore any unkown pragmas if not using OpenMP
if(NOT ${QUDA_OPENMP})
target_compile_options(quda_test PUBLIC $<$<COMPILE_LANGUAGE:CXX>:
$<$<CXX_COMPILER_ID:Clang>:-Wno-unknown-pragmas>
$<$<CXX_COMPILER_ID:GNU>:-Wno-unknown-pragmas>
>)
endif()

if(BUILD_SHARED_LIBS)
install(TARGETS quda_test ${QUDA_EXCLUDE_FROM_INSTALL} DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
Expand Down Expand Up @@ -265,6 +274,7 @@ if(QUDA_MPI OR QUDA_QMP)
if(DEFINED ENV{QUDA_TEST_GRID_SIZE})
get_test_ranks($ENV{QUDA_TEST_GRID_SIZE} QUDA_TEST_NUM_PROCS)
endif()
message(STATUS "ctest will run on ${QUDA_TEST_NUM_PROCS} processes")
set(QUDA_CTEST_LAUNCH ${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${QUDA_TEST_NUM_PROCS};${MPIEXEC_PREFLAGS}
CACHE STRING "CTest Launcher command for QUDA's tests")
endif()
Expand Down Expand Up @@ -375,6 +385,18 @@ foreach(pol IN LISTS DSLASH_POLICIES)
set_tests_properties(dslash_${DIRAC_NAME}_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol})
endif()

add_test(NAME dslash_${DIRAC_NAME}_splitgrid_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
--all-partitions 0
--test Dslash
--dim 2 4 6 8
--gtest_output=xml:dslash_${DIRAC_NAME}_splitgrid_test_pol${pol2}.xml)
if(polenv)
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol})
endif()
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})

add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
Expand Down Expand Up @@ -830,6 +852,17 @@ endif()
set_tests_properties(dslash_${DIRAC_NAME}_mat_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2})
endif()

add_test(NAME dslash_${DIRAC_NAME}_splitgrid_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:staggered_dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
--test MatPC
--dim 2 4 6 8
--gtest_output=xml:dslash_${DIRAC_NAME}_matpc_test_pol${pol2}.xml)
if(polenv)
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2})
endif()
set_tests_properties(dslash_${DIRAC_NAME}_splitgrid_policy${pol2} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})

add_test(NAME benchmark_dslash_${DIRAC_NAME}_policy${pol2}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:staggered_dslash_ctest> ${MPIEXEC_POSTFLAGS}
--dslash-type ${DIRAC_NAME}
Expand Down Expand Up @@ -931,6 +964,20 @@ foreach(prec IN LISTS TEST_PRECS)
--dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000
--enable-testing true
--gtest_output=xml:invert_test_wilson_${prec}.xml)

if(DEFINED ENV{QUDA_ENABLE_TUNING})
if($ENV{QUDA_ENABLE_TUNING} EQUAL 0)
add_test(NAME invert_test_splitgrid_wilson_${prec}
COMMAND ${QUDA_CTEST_LAUNCH} $<TARGET_FILE:invert_test> ${MPIEXEC_POSTFLAGS}
--dslash-type wilson --ngcrkrylov 8
--dim 2 4 6 8 --prec ${prec} --tol ${tol} --tolhq ${tol} --niter 1000
--nsrc ${QUDA_TEST_NUM_PROCS}
--enable-testing true
--gtest_output=xml:invert_test_splitgrid_wilson_${prec}.xml)

set_tests_properties(invert_test_splitgrid_wilson_${prec} PROPERTIES ENVIRONMENT QUDA_TEST_GRID_PARTITION=$ENV{QUDA_TEST_GRID_SIZE})
endif()
endif()
endif()

if(QUDA_DIRAC_TWISTED_MASS)
Expand Down
18 changes: 10 additions & 8 deletions tests/dslash_ctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
return true;
}

// work out if test_split_grid is enabled
bool test_split_grid = (grid_partition[0] * grid_partition[1] * grid_partition[2] * grid_partition[3] > 1);
if (::testing::get<2>(GetParam()) > 0 && test_split_grid) { return true; }

const std::array<bool, 16> partition_enabled {true, true, true, false, true, false, false, false,
true, false, false, false, true, false, true, true};
if (!ctest_all_partitions && !partition_enabled[::testing::get<2>(GetParam())]) return true;
Expand Down Expand Up @@ -68,8 +64,6 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
}

public:
DslashTest() : dslash_test_wrapper(dtest_type) { }

virtual void SetUp()
{
int prec = ::testing::get<0>(GetParam());
Expand All @@ -94,12 +88,20 @@ class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, in
commDimPartitionedReset();
}

static void SetUpTestCase() { initQuda(device_ordinal); }
static void SetUpTestCase()
{
initQuda(device_ordinal);
DslashTestWrapper::dtest_type = dtest_type;
}

// Per-test-case tear-down.
// Called after the last test in this test case.
// Can be omitted if not needed.
static void TearDownTestCase() { endQuda(); }
static void TearDownTestCase()
{
DslashTestWrapper::destroy();
endQuda();
}
};

TEST_P(DslashTest, verify)
Expand Down
14 changes: 10 additions & 4 deletions tests/dslash_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ class DslashTest : public ::testing::Test
}

public:
DslashTest() : dslash_test_wrapper(dtest_type) { }

virtual void SetUp()
{
dslash_test_wrapper.init_test(argc_copy, argv_copy);
Expand All @@ -43,12 +41,20 @@ class DslashTest : public ::testing::Test

virtual void TearDown() { dslash_test_wrapper.end(); }

static void SetUpTestCase() { initQuda(device_ordinal); }
static void SetUpTestCase()
{
initQuda(device_ordinal);
DslashTestWrapper::dtest_type = dtest_type;
}

// Per-test-case tear-down.
// Called after the last test in this test case.
// Can be omitted if not needed.
static void TearDownTestCase() { endQuda(); }
static void TearDownTestCase()
{
DslashTestWrapper::destroy();
endQuda();
}
};

TEST_F(DslashTest, benchmark) { dslash_test_wrapper.run_test(niter, /**show_metrics =*/true); }
Expand Down
Loading
Loading