Skip to content

Commit

Permalink
Merge pull request QMCPACK#4813 from ye-luo/stream-sync
Browse files Browse the repository at this point in the history
Add stream sync to detect errors on the device.
  • Loading branch information
prckent authored Nov 3, 2023
2 parents e9cd2c8 + 3968de5 commit 137615d
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 12 deletions.
3 changes: 1 addition & 2 deletions src/QMCWaveFunctions/Fermion/DelayedUpdateCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,7 @@ class DelayedUpdateCUDA
cudaErrorCheck(cudaMemcpyAsync(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(T), cudaMemcpyDeviceToHost,
hstream),
"cudaMemcpyAsync failed!");
// no need to wait because : For transfers from device memory to pageable host memory, the function will return only once the copy has completed.
//cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
cudaErrorCheck(cudaStreamSynchronize(hstream), "cudaStreamSynchronize failed!");
}
}
};
Expand Down
15 changes: 10 additions & 5 deletions src/QMCWaveFunctions/Fermion/cuSolverInverter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class cuSolverInverter
cudaMemcpyDeviceToHost, hstream_),
"cudaMemcpyAsync failed!");
// check LU success
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize failed!");
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize after getrf failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand All @@ -124,7 +124,8 @@ class cuSolverInverter
cudaErrorCheck(cudaMemcpyAsync(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT), cudaMemcpyDeviceToHost,
hstream_),
"cudaMemcpyAsync failed!");
// no need to wait because : For transfers from device memory to pageable host memory, the function will return only once the copy has completed.
// check solve success
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize after getrs failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand Down Expand Up @@ -162,7 +163,7 @@ class cuSolverInverter
cudaMemcpyDeviceToHost, hstream_),
"cudaMemcpyAsync failed!");
// check LU success
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize failed!");
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize after getrf failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand All @@ -180,17 +181,21 @@ class cuSolverInverter
cudaErrorCheck(cudaMemcpyAsync(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT), cudaMemcpyDeviceToHost,
hstream_),
"cudaMemcpyAsync failed!");
// no need to wait because : For transfers from device memory to pageable host memory, the function will return only once the copy has completed.
// check solve success
cudaErrorCheck(cudaStreamSynchronize(hstream_), "cudaStreamSynchronize after getrs failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
err << "cusolver::getrs calculation failed with devInfo = " << ipiv[0] << std::endl;
throw std::runtime_error(err.str());
}

std::ostringstream nan_msg;
for(int i = 0; i < norb; i++)
if (qmcplusplus::isnan(std::norm(Ainv[i][i])))
throw std::runtime_error("Ainv[i][i] is NaN. i = " + std::to_string(i));
nan_msg << " Ainv["<< i << "][" << i << "] has bad value " << Ainv[i][i] << std::endl;
if (const std::string str = nan_msg.str(); !str.empty())
throw std::runtime_error("Inverse matrix diagonal check found:\n" + str);
}
};
} // namespace qmcplusplus
Expand Down
14 changes: 9 additions & 5 deletions src/QMCWaveFunctions/Fermion/rocSolverInverter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class rocSolverInverter
hipMemcpyDeviceToHost, hstream_),
"hipMemcpyAsync for LU_diag failed!");
// check LU success
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize failed!");
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize after getrf failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand All @@ -137,7 +137,7 @@ class rocSolverInverter
cudaErrorCheck(hipMemcpyAsync(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT), hipMemcpyDeviceToHost,
hstream_),
"hipMemcpyAsync for Ainv failed!");
// no need to wait because : For transfers from device memory to pageable host memory, the function will return only once the copy has completed.
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize after getrs failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand Down Expand Up @@ -175,7 +175,7 @@ class rocSolverInverter
hipMemcpyDeviceToHost, hstream_),
"hipMemcpyAsync failed!");
// check LU success
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize failed!");
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize after getrf failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand All @@ -194,7 +194,8 @@ class rocSolverInverter
cudaErrorCheck(hipMemcpyAsync(Ainv.data(), Ainv_gpu.data(), Ainv.size() * sizeof(TMAT), hipMemcpyDeviceToHost,
hstream_),
"hipMemcpyAsync failed!");
// no need to wait because : For transfers from device memory to pageable host memory, the function will return only once the copy has completed.
// check solve success
cudaErrorCheck(hipStreamSynchronize(hstream_), "hipStreamSynchronize after getrs failed!");
if (ipiv[0] != 0)
{
std::ostringstream err;
Expand All @@ -203,9 +204,12 @@ class rocSolverInverter
throw std::runtime_error(err.str());
}

std::ostringstream nan_msg;
for(int i = 0; i < norb; i++)
if (qmcplusplus::isnan(std::norm(Ainv[i][i])))
throw std::runtime_error("Ainv[i][i] is NaN. i = " + std::to_string(i));
nan_msg << " Ainv["<< i << "][" << i << "] has bad value " << Ainv[i][i] << std::endl;
if (const std::string str = nan_msg.str(); !str.empty())
throw std::runtime_error("Inverse matrix diagonal check found:\n" + str);
}
};
} // namespace qmcplusplus
Expand Down

0 comments on commit 137615d

Please sign in to comment.