Skip to content

Commit

Permalink
Merge branch 'develop' into float-fftw-info
Browse files Browse the repository at this point in the history
  • Loading branch information
denghuilu authored Dec 2, 2023
2 parents d83e59f + 37cc1a3 commit 98ff034
Show file tree
Hide file tree
Showing 17 changed files with 841 additions and 670 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,10 @@ endif()
# Warning: CMake add support to HIP in version 3.21. This is rather a new version.
# Use cmake with AMD-ROCm: https://rocmdocs.amd.com/en/latest/Installation_Guide/Using-CMake-with-AMD-ROCm.html
if(USE_ROCM)
if (NOT DEFINED ROCM_PATH )
if(COMMIT_INFO)
message(FATAL_ERROR "Commit info is not supported on ROCm.")
endif()
if(NOT DEFINED ROCM_PATH )
set (ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
if(NOT DEFINED HIP_PATH)
Expand Down
19 changes: 11 additions & 8 deletions examples/scf/pw_Si2/INPUT
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
INPUT_PARAMETERS
#Parameters (General)
pseudo_dir ../../../tests/PP_ORB
symmetry 1
#Parameters (Accuracy)
basis_type pw
ecutwfc 60
scf_thr 1e-8
scf_nmax 100
#Parameters (General)
pseudo_dir ../../../tests/PP_ORB
symmetry 1
#Parameters (Accuracy)
basis_type pw
ecutwfc 60
scf_thr 1e-7
scf_nmax 100
device cpu
ks_solver cg
precision double
10 changes: 5 additions & 5 deletions source/module_base/module_container/ATen/kernels/cuda/linalg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ __global__ void do_add_kernel(
T* z)
{
// Perform add operation for the specified range [begin, end) in the output Tensor.
for (auto o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
// Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'.
z[o_idx] = alpha * x[o_idx] + beta * y[o_idx];
}
Expand All @@ -44,7 +44,7 @@ __global__ void do_mul_kernel(
const T* x,
T* y)
{
for (auto o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
// Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'.
y[o_idx] = alpha * x[o_idx];
}
Expand All @@ -58,7 +58,7 @@ __global__ void do_mul_kernel(
const T* y,
T* z)
{
for (auto o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
// Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'.
z[o_idx] = alpha * x[o_idx] * y[o_idx];
}
Expand All @@ -72,7 +72,7 @@ __global__ void do_div_kernel(
const T* y,
T* z)
{
for (auto o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
// Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'.
z[o_idx] = alpha * x[o_idx] / y[o_idx];
}
Expand All @@ -88,7 +88,7 @@ __global__ void do_fma_kernel(
const T* z,
T* out)
{
for (auto o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
for (int o_idx = threadIdx.x; o_idx < num_element; o_idx += blockDim.x) {
// Assign the sum of the input Tensor elements at index 'o_idx' to the output Tensor element at index 'o_idx'.
out[o_idx] = alpha * x[o_idx] * y[o_idx] + beta * z[o_idx];
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include <ATen/kernels/blas_op.h>
#include <ATen/kernels/blas.h>
#include <base/third_party/blas.h>

#include <hip/hip_runtime.h>
#include <hipblas/hipblas.h>

namespace container {
namespace op {
namespace kernels {

static hipblasHandle_t hipblas_handle = nullptr;

Expand Down Expand Up @@ -241,5 +241,5 @@ template struct blas_gemm_batched_strided<double, DEVICE_GPU>;
template struct blas_gemm_batched_strided<std::complex<float >, DEVICE_GPU>;
template struct blas_gemm_batched_strided<std::complex<double>, DEVICE_GPU>;

} // namespace op
} // namespace kernels
} // namespace container
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#include <vector>
#include <ATen/kernels/lapack_op.h>
#include <ATen/kernels/lapack.h>
#include <base/third_party/lapack.h>

#include <hip/hip_runtime.h>
#include <thrust/complex.h>
#include <hipsolver/hipsolver.h>

namespace container {
namespace op {
namespace kernels {


static hipsolverHandle_t hipsolver_handle = nullptr;
Expand Down Expand Up @@ -155,5 +155,5 @@ template struct lapack_dngvd<double, DEVICE_GPU>;
template struct lapack_dngvd<std::complex<float>, DEVICE_GPU>;
template struct lapack_dngvd<std::complex<double>, DEVICE_GPU>;

} // namespace op
} // namespace kernels
} // namespace container
Loading

0 comments on commit 98ff034

Please sign in to comment.