Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial GPU port of xtb #342

Merged
merged 3 commits into from
Sep 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ subdir('src')
srcs += 'symmetry/symmetry.f90'
srcs += 'symmetry/symmetry_i.c'

if get_option('nvtx')
srcs += 'src/nvtx.f90'
endif

xtb_inc = meson.current_source_dir() / 'include'
incdir = include_directories('include')

Expand Down
20 changes: 19 additions & 1 deletion meson/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,23 @@ elif fc.get_id() == 'intel'
add_project_arguments('-traceback', language: 'fortran')
elif fc.get_id() == 'pgi'
add_project_arguments(
'-Mbackslash', '-Mallocatable=03', '-traceback', '-r8',
'-Mpreprocess', '-Mbackslash', '-Mallocatable=03', '-traceback', '-r8',
language: 'fortran'
)

if get_option('gpu')
add_project_arguments('-acc', '-Minfo=accel', '-DXTB_GPU', language: 'fortran')
add_project_link_arguments('-acc', '-Minfo=accel', language: 'fortran')

gpu_arch = get_option('gpu_arch')
add_project_arguments('-ta=tesla:cc@0@'.format(gpu_arch), language: 'fortran')
add_project_link_arguments('-ta=tesla:cc@0@'.format(gpu_arch), language: 'fortran')

if get_option('cusolver')
add_project_arguments('-Mcudalib=cusolver', '-DUSE_CUSOLVER', language: 'fortran')
add_project_link_arguments('-Mcudalib=cusolver', '-DUSE_CUSOLVER', language: 'fortran')
endif
endif
endif

# fix compiliation problems with of symmetry/symmetry_i.c
Expand Down Expand Up @@ -135,6 +149,10 @@ endif

dependencies += dependency('threads')

if get_option('nvtx')
dependencies += fc.find_library('nvToolsExt', required: true)
endif

# distribute dependencies for shared object and static executable
lib_deps += dependencies
exe_deps += dependencies
10 changes: 10 additions & 0 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,13 @@ option('build_name', type: 'string', value: 'unknown',
description: 'Name of the build, will be overwritten automatically by git')
option('test_timeout', type: 'integer', min: 1, value: 30,
description: 'test timeout in seconds')

# GPU specific options
option('gpu', type: 'boolean', value: false,
description: 'use GPU acceleration')
option('gpu_arch', type: 'string', value: '70',
description: 'GPU architecture version string')
option('cusolver', type: 'boolean', value: false,
description: 'Use cuSOLVER for eigensolver routines')
option('nvtx', type: 'boolean', value: false,
description: 'use NVTX markers')
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ list(APPEND srcs
"${dir}/scan_driver.f90"
"${dir}/scanparam.f90"
"${dir}/scc_core.f90"
"${dir}/scf_module.f90"
"${dir}/scf_module.F90"
"${dir}/screening.f90"
"${dir}/set_module.f90"
"${dir}/setparam.f90"
Expand Down
2 changes: 1 addition & 1 deletion src/disp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ list(APPEND srcs
"${dir}/dftd3.f"
"${dir}/dftd3.f90"
"${dir}/dftd3_parameters.f90"
"${dir}/dftd4.f90"
"${dir}/dftd4.F90"
"${dir}/dftd4_parameters.f90"
"${dir}/encharges.f90"
"${dir}/ncoord.f90"
Expand Down
143 changes: 142 additions & 1 deletion src/disp/dftd4.f90 → src/disp/dftd4.F90
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
! This file is part of xtb.
!
! Copyright (C) 2017-2020 Stefan Grimme
! Copyright (C) 2020, NVIDIA CORPORATION. All rights reserved.
!
! xtb is free software: you can redistribute it and/or modify it under
! the terms of the GNU Lesser General Public License as published by
Expand Down Expand Up @@ -1609,8 +1610,13 @@ subroutine d4_full_gradient_latp &
if (par%s9 /= 0.0_wp) then
call get_atomic_c6(dispm, nat, mol%at, zerovec, zerodcn, zerodq, &
& c6, dc6dcn, dc6dq)
#ifdef XTB_GPU
call atm_gradient_latp_gpu(mol, trans, cutoff3, par, sqrtZr4r2, c6, dc6dcn, &
& energies3, gradient, sigma, dEdcn)
#else
call atm_gradient_latp(mol, trans, cutoff3, par, sqrtZr4r2, c6, dc6dcn, &
& energies3, gradient, sigma, dEdcn)
#endif
end if
if (present(e3)) e3 = sum(energies3)

Expand Down Expand Up @@ -1892,9 +1898,15 @@ subroutine d4_atm_gradient_latp &
call get_atomic_c6(dispm, nat, mol%at, zerovec, zerodcn, zerodq, &
& c6, dc6dcn, dc6dq)

#ifdef XTB_GPU
call atm_gradient_latp_gpu &
& (mol, trans, cutoff, par, sqrtZr4r2, c6, dc6dcn, &
& energies, gradient, sigma, dEdcn)
#else
call atm_gradient_latp &
& (mol, trans, cutoff, par, sqrtZr4r2, c6, dc6dcn, &
& energies, gradient, sigma, dEdcn)
#endif

call mctc_gemv(dcndr, dEdcn, gradient, beta=1.0_wp)
call mctc_gemv(dcndL, dEdcn, sigma, beta=1.0_wp)
Expand All @@ -1903,7 +1915,6 @@ subroutine d4_atm_gradient_latp &

end subroutine d4_atm_gradient_latp


subroutine atm_gradient_latp &
& (mol, trans, cutoff, par, r4r2, c6, dc6dcn, &
& energies, gradient, sigma, dEdcn)
Expand Down Expand Up @@ -1996,11 +2007,140 @@ subroutine atm_gradient_latp &

end subroutine atm_gradient_latp

subroutine atm_gradient_latp_gpu &
& (mol, trans, cutoff, par, r4r2, c6, dc6dcn, &
& energies, gradient, sigma, dEdcn)

!> Molecular structure data
type(TMolecule), intent(in) :: mol

!> Damping parameters
type(dftd_parameter), intent(in) :: par

real(wp), intent(in) :: trans(:, :)
real(wp), intent(in) :: r4r2(:)
real(wp), intent(in) :: cutoff
real(wp), intent(in) :: c6(:, :)
real(wp), intent(in) :: dc6dcn(:, :)

real(wp), intent(inout) :: energies(:)
real(wp), intent(inout) :: gradient(:, :)
real(wp), intent(inout) :: sigma(:, :)
real(wp), intent(inout) :: dEdcn(:)

integer :: iat, jat, kat, ati, atj, atk, jtr, ktr
real(wp) :: cutoff2
real(wp) :: rij(3), rjk(3), rik(3), r2ij, r2jk, r2ik
real(wp) :: c6ij, c6jk, c6ik, cij, cjk, cik, scale
real(wp) :: dE, dG(3, 3), dS(3, 3), dCN(3)
real(wp), parameter :: sr = 4.0_wp/3.0_wp
integer :: mlen, k, kk

cutoff2 = cutoff**2
mlen = len(mol)

!$acc enter data copyin(par,trans,r4r2,c6,dc6dcn,energies,gradient,sigma,dEdcn, &
!$acc& mol,mol%at,mol%xyz)

!$acc parallel default(present) private(rij,rjk,rik,dG,dS,dCN)

!$acc loop gang collapse(2)
do iat = 1, mlen
do jat = 1, mlen
if (jat.gt.iat) cycle

do kat = 1, jat
ati = mol%at(iat)
atj = mol%at(jat)

c6ij = c6(jat,iat)
cij = par%a1*sqrt(3.0_wp*r4r2(ati)*r4r2(atj))+par%a2

atk = mol%at(kat)

c6ik = c6(kat,iat)
c6jk = c6(kat,jat)

cik = par%a1*sqrt(3.0_wp*r4r2(ati)*r4r2(atk))+par%a2
cjk = par%a1*sqrt(3.0_wp*r4r2(atj)*r4r2(atk))+par%a2

do jtr = 1, size(trans, dim=2)
rij = mol%xyz(:, jat) - mol%xyz(:, iat) + trans(:, jtr)
r2ij = sum(rij**2)
if (r2ij > cutoff2 .or. r2ij < 1.0e-14_wp) cycle
do ktr = 1, size(trans, dim=2)
if (jat == kat .and. jtr == ktr) cycle
rik = mol%xyz(:, kat) - mol%xyz(:, iat) + trans(:, ktr)
r2ik = sum(rik**2)
if (r2ik > cutoff2 .or. r2ik < 1.0e-14_wp) cycle
rjk = mol%xyz(:, kat) - mol%xyz(:, jat) + trans(:, ktr) &
& - trans(:, jtr)
r2jk = sum(rjk**2)
if (r2jk > cutoff2 .or. r2jk < 1.0e-14_wp) cycle

call deriv_atm_triple(c6ij, c6ik, c6jk, cij, cjk, cik, &
& r2ij, r2jk, r2ik, dc6dcn(iat,jat), dc6dcn(jat,iat), &
& dc6dcn(jat,kat), dc6dcn(kat,jat), dc6dcn(iat,kat), &
& dc6dcn(kat,iat), rij, rjk, rik, par%alp, dE, dG, dS, dCN)

scale = par%s9 * triple_scale(iat, jat, kat)
!$acc atomic
energies(iat) = energies(iat) + dE * scale/3
!$acc end atomic
!$acc atomic
energies(jat) = energies(jat) + dE * scale/3
!$acc end atomic
!$acc atomic
energies(kat) = energies(kat) + dE * scale/3
!$acc end atomic
do k = 1,3
!$acc atomic
gradient(k, iat) = gradient(k, iat) + dG(k, 1) * scale
!$acc end atomic
!$acc atomic
gradient(k, jat) = gradient(k, jat) + dG(k, 2) * scale
!$acc end atomic
!$acc atomic
gradient(k, kat) = gradient(k, kat) + dG(k, 3) * scale
!$acc end atomic
enddo
do k = 1,3
do kk = 1,3
!$acc atomic
sigma(k, kk) = sigma(k, kk) + dS(k, kk) * scale
!$acc end atomic
enddo
enddo
!$acc atomic
dEdcn(iat) = dEdcn(iat) + dCN(1) * scale
!$acc end atomic
!$acc atomic
dEdcn(jat) = dEdcn(jat) + dCN(2) * scale
!$acc end atomic
!$acc atomic
dEdcn(kat) = dEdcn(kat) + dCN(3) * scale
!$acc end atomic

end do
end do

end do
end do
end do
!$acc end parallel

!$acc exit data copyout(energies,gradient,sigma,dEdcn)
!$acc exit data delete(par,trans,r4r2,c6,dc6dcn,mol,mol%at,mol%xyz)


end subroutine atm_gradient_latp_gpu


pure subroutine deriv_atm_triple(c6ij, c6ik, c6jk, cij, cjk, cik, &
& r2ij, r2jk, r2ik, dc6ij, dc6ji, dc6jk, dc6kj, dc6ik, dc6ki, &
& rij, rjk, rik, alp, dE, dG, dS, dCN)

!$acc routine vector
real(wp), intent(in) :: c6ij, c6ik, c6jk
real(wp), intent(in) :: cij, cjk, cik
real(wp), intent(in) :: r2ij, r2jk, r2ik
Expand Down Expand Up @@ -2069,6 +2209,7 @@ end subroutine deriv_atm_triple

!> Logic exercise to distribute a triple energy to atomwise energies.
elemental function triple_scale(ii, jj, kk) result(scale)
!$acc routine seq

!> Atom indices
integer, intent(in) :: ii, jj, kk
Expand Down
2 changes: 1 addition & 1 deletion src/disp/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ srcs += files(
'dftd3.f',
'dftd3.f90',
'dftd3_parameters.f90',
'dftd4.f90',
'dftd4.F90',
'dftd4_parameters.f90',
'encharges.f90',
'ncoord.f90',
Expand Down
3 changes: 3 additions & 0 deletions src/intgrad.f90
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ module xtb_intgrad
! --------------------------------------------------------------[SAW1907]-
!> calculates a partial overlap in one cartesian direction
pure elemental function olapp(l,gama) result(s)
!$acc routine seq
implicit none
integer,intent(in) :: l
real(wp),intent(in) :: gama
Expand Down Expand Up @@ -344,6 +345,7 @@ end subroutine build_hshift

! --------------------------------------------------------------[SAW1801]-
pure subroutine build_hshift2(cfs,a,e,l)
!$acc routine seq
implicit none
integer,intent(in) :: l
real(wp), intent(in) :: a,e
Expand Down Expand Up @@ -374,6 +376,7 @@ end subroutine build_hshift2

! --------------------------------------------------------------[SAW1801]-
pure subroutine prod3(a,b,d,la,lb)
!$acc routine seq
implicit none
integer,intent(in) :: la,lb
real(wp), intent(in) :: a(*),b(*)
Expand Down
1 change: 1 addition & 0 deletions src/lin_mod.f90
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module xtb_lin
!***********************************************************************

pure elemental integer function lin(i1,i2)
!$acc routine seq
integer,intent(in) :: i1,i2
integer :: idum1,idum2
idum1=max(i1,i2)
Expand Down
4 changes: 2 additions & 2 deletions src/mctc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ list(APPEND srcs
"${dir}/systools.F90"
"${dir}/thresholds.f90"
"${dir}/version.f90"
"${dir}/mctc_global.f90"
"${dir}/mctc_global.F90"
"${dir}/mctc_strings.f90"
"${dir}/mctc_constants.f90"
"${dir}/mctc_param.f90"
Expand All @@ -53,7 +53,7 @@ list(APPEND srcs
"${dir}/linalg.f90"
"${dir}/lapack.f90"
"${dir}/blas.f90"
"${dir}/mctc_init.f90"
"${dir}/mctc_init.F90"
"${dir}/error.f90"
"${dir}/signal.c"
)
Expand Down
2 changes: 1 addition & 1 deletion src/mctc/lapack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
set(dir "${CMAKE_CURRENT_SOURCE_DIR}")

list(APPEND srcs
"${dir}/eigensolve.f90"
"${dir}/eigensolve.F90"
"${dir}/geneigval.f90"
"${dir}/gst.f90"
"${dir}/stdeigval.f90"
Expand Down
Loading