Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround for assignment-after-reduction offload bug #4773

Merged
merged 1 commit into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/QMCWaveFunctions/Fermion/MultiDiracDeterminant.2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ void MultiDiracDeterminant::mw_evaluateDetsForPtclMove(const RefVectorWithLeader

PRAGMA_OFFLOAD("omp target teams distribute map(always, from:curRatio_list_ptr[:nw]) \
is_device_ptr(psiV_list_devptr, psiMinv_temp_list_devptr)")
for (size_t iw = 0; iw < nw; iw++)
for (uint32_t iw = 0; iw < nw; iw++)
{
ValueType c_ratio = 0.0;
PRAGMA_OFFLOAD("omp parallel for reduction(+ : c_ratio)")
Expand Down Expand Up @@ -780,9 +780,11 @@ void MultiDiracDeterminant::mw_evaluateDetsAndGradsForPtclMove(
throw std::runtime_error("In MultiDiracDeterminant ompBLAS::copy_batched_offset failed.");


// Index of loop over nw must be 32 bit sized to avoid assignment-after-reduction offload bug
// See https://github.com/QMCPACK/qmcpack/issues/4767
PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(psiV_list_devptr, psiMinv_temp_list_devptr) \
map(always, from:curRatio_list_ptr[:nw])")
for (size_t iw = 0; iw < nw; iw++)
for (uint32_t iw = 0; iw < nw; iw++)
{
GradType ratioGradRef_local(0);
PRAGMA_OFFLOAD("omp parallel for reduction(+ : ratioGradRef_local)")
Expand Down Expand Up @@ -1048,7 +1050,7 @@ void MultiDiracDeterminant::mw_evaluateGrads(const RefVectorWithLeader<MultiDira

PRAGMA_OFFLOAD("omp target teams distribute is_device_ptr(psiMinv_list_devptr) \
map(always, from: ratioG_list_ptr[:nw])")
for (size_t iw = 0; iw < nw; iw++)
for (uint32_t iw = 0; iw < nw; iw++)
{
ValueType ratioG_local(0);
PRAGMA_OFFLOAD("omp parallel for reduction(+ : ratioG_local)")
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/Fermion/MultiSlaterDetTableMethod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ void MultiSlaterDetTableMethod::mw_evalGrad_impl(const RefVectorWithLeader<WaveF
map(from: grad_now_list_ptr[:3 * nw]) \
map(always, to: det_value_ptr_list_ptr[:nw]) \
map(to: mw_grads_ptr[:mw_grads.size()])")
for (size_t iw = 0; iw < nw; iw++)
for (uint32_t iw = 0; iw < nw; iw++)
{
// enforce full precision reduction due to numerical sensitivity
PsiValueType psi_local(0);
Expand Down Expand Up @@ -600,7 +600,7 @@ void MultiSlaterDetTableMethod::mw_calcRatio(const RefVectorWithLeader<WaveFunct
ScopedTimer local_timer(det_leader.offload_timer);
PRAGMA_OFFLOAD("omp target teams distribute map(always,from: psi_list_ptr[:nw]) \
map(always, to: det_value_ptr_list_ptr[:nw])")
for (size_t iw = 0; iw < nw; iw++)
for (uint32_t iw = 0; iw < nw; iw++)
{
PsiValueType psi_local(0);
PRAGMA_OFFLOAD("omp parallel for reduction(+ : psi_local)")
Expand Down