diff --git a/src/acc/dbcsr_acc_device.F b/src/acc/dbcsr_acc_device.F index 7b4d29f25c6..93bae300550 100644 --- a/src/acc/dbcsr_acc_device.F +++ b/src/acc/dbcsr_acc_device.F @@ -13,6 +13,8 @@ MODULE dbcsr_acc_device #endif #include "base/dbcsr_base_uses.f90" +!$ USE OMP_LIB, ONLY: omp_in_parallel + IMPLICIT NONE PUBLIC :: dbcsr_acc_get_ndevices, dbcsr_acc_set_active_device, dbcsr_acc_clear_errors @@ -83,11 +85,16 @@ SUBROUTINE dbcsr_acc_set_active_device(device_id) #if defined (__DBCSR_ACC) INTEGER :: istat -!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat) SHARED(device_id) - istat = acc_set_active_device_cu(device_id) +!$ IF (.NOT. omp_in_parallel()) THEN + istat = 0 +!$OMP PARALLEL DEFAULT(NONE) SHARED(device_id) REDUCTION(MAX:istat) + istat = acc_set_active_device_cu(device_id) +!$OMP END PARALLEL +!$ ELSE + istat = acc_set_active_device_cu(device_id) +!$ END IF IF (istat /= 0) & DBCSR_ABORT("dbcsr_acc_set_active_device: failed") -!$OMP END PARALLEL #else MARK_USED(device_id) diff --git a/src/data/dbcsr_ptr_util.F b/src/data/dbcsr_ptr_util.F index eb9d25da071..aa5c94ed1ae 100644 --- a/src/data/dbcsr_ptr_util.F +++ b/src/data/dbcsr_ptr_util.F @@ -28,7 +28,7 @@ MODULE dbcsr_ptr_util mp_deallocate #include "base/dbcsr_base_uses.f90" -!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads +!$ USE OMP_LIB, ONLY: omp_in_parallel IMPLICIT NONE @@ -294,14 +294,20 @@ SUBROUTINE mem_copy_${nametype1}$ (dst, src, n) !! length of copy ${type1}$, DIMENSION(1:n), INTENT(OUT) :: dst !! destination memory - ${type1}$, DIMENSION(1:n), INTENT(IN) :: src + ${type1}$, DIMENSION(1:n), INTENT(IN) :: src !! source memory #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst,src) +!$ IF (.NOT. omp_in_parallel()) THEN +!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst,src) + dst(:) = src(:) +!$OMP END PARALLEL WORKSHARE +!$ ELSE +!$OMP WORKSHARE #endif - dst(:) = src(:) + dst(:) = src(:) #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE +!$OMP END WORKSHARE +!$ END IF #endif END SUBROUTINE mem_copy_${nametype1}$ @@ -313,11 +319,17 @@ SUBROUTINE mem_zero_${nametype1}$ (dst, n) ${type1}$, DIMENSION(1:n), INTENT(OUT) :: dst !! destination memory #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst) +!$ IF (.NOT. omp_in_parallel()) THEN +!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst) + dst(:) = ${zero1}$ +!$OMP END PARALLEL WORKSHARE +!$ ELSE +!$OMP WORKSHARE #endif - dst(:) = ${zero1}$ + dst(:) = ${zero1}$ #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE +!$OMP END WORKSHARE +!$ END IF #endif END SUBROUTINE mem_zero_${nametype1}$ diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index f5393630eb9..14d4745746a 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -95,6 +95,8 @@ MODULE dbcsr_mpiwrap #define MPI_STATUS_EXTRACT(X) (X) #endif +!$ USE OMP_LIB, ONLY: omp_in_parallel + IMPLICIT NONE PRIVATE @@ -5183,11 +5185,17 @@ SUBROUTINE mp_rget_${nametype1}$v(base, source, win, win_data, myproc, disp, req #endif IF (do_local_copy) THEN #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(base,win_data,disp_aint,len) -#endif - base(:) = win_data(disp_aint + 1:disp_aint + len) +!$ IF (.NOT. omp_in_parallel()) THEN +!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(base,win_data,disp_aint,len) + base(:) = win_data(disp_aint + 1:disp_aint + len) +!$OMP END PARALLEL WORKSHARE +!$ ELSE +!$OMP WORKSHARE +#endif + base(:) = win_data(disp_aint + 1:disp_aint + len) #if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE +!$OMP END WORKSHARE +!$ END IF #endif request = mp_request_null ierr = 0 diff --git a/src/ops/dbcsr_operations.F b/src/ops/dbcsr_operations.F index e0a59a92e48..80fafd09163 100644 --- a/src/ops/dbcsr_operations.F +++ b/src/ops/dbcsr_operations.F @@ -94,7 +94,7 @@ MODULE dbcsr_operations mp_sum #include "base/dbcsr_base_uses.f90" -!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads +!$ USE OMP_LIB, ONLY: omp_get_thread_num, omp_get_num_threads, omp_in_parallel IMPLICIT NONE @@ -315,35 +315,58 @@ SUBROUTINE dbcsr_zero(matrix_a) INTEGER :: handle CALL timeset(routineN, handle) - SELECT CASE (dbcsr_get_data_type(matrix_a)) -#if defined(__DBCSR_DISABLE_WORKSHARE) - CASE (dbcsr_type_complex_4) - matrix_a%data_area%d%c_sp = (0.0, 0.0) - CASE (dbcsr_type_complex_8) - matrix_a%data_area%d%c_dp = (0.0_dp, 0.0_dp) - CASE (dbcsr_type_real_4) - matrix_a%data_area%d%r_sp = 0.0 - CASE (dbcsr_type_real_8) - matrix_a%data_area%d%r_dp = 0.0_dp +#if !defined(__DBCSR_DISABLE_WORKSHARE) +!$ IF (.NOT. omp_in_parallel()) THEN + SELECT CASE (dbcsr_get_data_type(matrix_a)) + CASE (dbcsr_type_complex_4) +!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(matrix_a) + matrix_a%data_area%d%c_sp = (0.0, 0.0) +!$OMP END PARALLEL WORKSHARE + CASE (dbcsr_type_complex_8) +!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(matrix_a) + matrix_a%data_area%d%c_dp = (0.0_dp, 0.0_dp) +!$OMP END PARALLEL WORKSHARE + CASE (dbcsr_type_real_4) +!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(matrix_a) + matrix_a%data_area%d%r_sp = 0.0 +!$OMP END PARALLEL WORKSHARE + CASE (dbcsr_type_real_8) +!$OMP PARALLEL WORKSHARE DEFAULT(NONE) SHARED(matrix_a) + matrix_a%data_area%d%r_dp = 0.0_dp +!$OMP END PARALLEL WORKSHARE + END SELECT +!$ ELSE + SELECT CASE (dbcsr_get_data_type(matrix_a)) + CASE (dbcsr_type_complex_4) +!$OMP WORKSHARE + matrix_a%data_area%d%c_sp = (0.0, 0.0) +!$OMP END WORKSHARE + CASE (dbcsr_type_complex_8) +!$OMP WORKSHARE + matrix_a%data_area%d%c_dp = (0.0_dp, 0.0_dp) +!$OMP END WORKSHARE + CASE (dbcsr_type_real_4) +!$OMP WORKSHARE + matrix_a%data_area%d%r_sp = 0.0 +!$OMP END WORKSHARE + CASE (dbcsr_type_real_8) +!$OMP WORKSHARE + matrix_a%data_area%d%r_dp = 0.0_dp +!$OMP END WORKSHARE + END SELECT +!$ END IF #else + SELECT CASE (dbcsr_get_data_type(matrix_a)) CASE (dbcsr_type_complex_4) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) matrix_a%data_area%d%c_sp = (0.0, 0.0) -!$OMP END PARALLEL WORKSHARE CASE (dbcsr_type_complex_8) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) matrix_a%data_area%d%c_dp = (0.0_dp, 0.0_dp) -!$OMP END PARALLEL WORKSHARE CASE (dbcsr_type_real_4) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) matrix_a%data_area%d%r_sp = 0.0 -!$OMP END PARALLEL WORKSHARE CASE (dbcsr_type_real_8) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) matrix_a%data_area%d%r_dp = 0.0_dp -!$OMP END PARALLEL WORKSHARE -#endif END SELECT +#endif CALL timestop(handle) END SUBROUTINE dbcsr_zero