From 383736e85533ae6687b6194135acd12bbd0176f7 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Mon, 11 Jan 2021 15:49:53 +0100 Subject: [PATCH 01/10] Added kernel routines for XL-BOMD --- src/CMakeLists.txt | 2 + src/prg_ewald_mod.F90 | 751 +++++++++++++++++++++++++++++ src/prg_implicit_fermi_mod.F90 | 326 +++++++++++-- src/prg_xlbokernel_mod.F90 | 837 +++++++++++++++++++++++++++++++++ 4 files changed, 1879 insertions(+), 37 deletions(-) create mode 100644 src/prg_ewald_mod.F90 create mode 100644 src/prg_xlbokernel_mod.F90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9b6b5ce4..beaa317a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(progress prg_chebyshev_mod.F90 prg_densitymatrix_mod.F90 prg_dos_mod.F90 + prg_ewald_mod.F90 prg_extras_mod.F90 prg_genz_mod.F90 prg_graph_mod.F90 @@ -47,6 +48,7 @@ add_library(progress prg_system_mod.F90 prg_timer_mod.F90 prg_xlbo_mod.F90 + prg_xlbokernel_mod.F90 prg_xlkernel_mod.F90) if(OPENMP_FOUND) diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 new file mode 100644 index 00000000..5a032412 --- /dev/null +++ b/src/prg_ewald_mod.F90 @@ -0,0 +1,751 @@ +! Ewald sum routines for kernel calculation +module prg_ewald_mod + + use bml + use prg_timer_mod + use prg_parallel_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: Ewald_Real_Space_Single + public :: Ewald_Real_Space_Single_latte + public :: Ewald_Real_Space + public :: Ewald_Real_Space_latte + public :: Ewald_k_space + public :: Ewald_k_space_latte + +contains + +!> Find Coulomb potential on site I from single charge at site J +subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & + DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) +real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) +real(PREC), intent(in) :: U(Nr_elem) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + +integer :: K, ccnt,l,m,n + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +COULOMBV = ZERO + +TI = TFACT*U(Element_Pointer(I)) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RXYZ(1,I) +Ra(2) = RXYZ(2,I) +Ra(3) = RXYZ(3,I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RXYZ(1,J)+k*box(1,1) + Rb(2) = RXYZ(2,J)+m*box(2,2) + Rb(3) = RXYZ(3,J)+l*box(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + endif + enddo + enddo + enddo + +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_Single_latte + +subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) + + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J +real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) +real(PREC), intent(in) :: U(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +character(10), intent(in) :: Element_Type(Nr_atoms) +real(PREC), intent(out) :: COULOMBV, FCOUL(3) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + +integer :: K, ccnt,l,m,n + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +FCOUL = ZERO +COULOMBV = ZERO + +TI = TFACT*U(I) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RX(I) +Ra(2) = RY(I) +Ra(3) = RZ(I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RX(J)+k*Lbox(1) + Rb(2) = RY(J)+m*Lbox(2) + Rb(3) = RZ(J)+l*Lbox(3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + enddo + enddo + +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_Single + +subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & + DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem +real(PREC), intent(in) :: COULACC +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) +real(PREC), intent(in) :: U(Nr_elem) +real(PREC) :: COULCUT, COULCUT2 +integer, intent(in) :: Element_Pointer(Nr_atoms) +integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) +real(PREC), intent(out) :: COULOMBV +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 +integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +COULOMBV = ZERO + +TI = TFACT*U(Element_Pointer(I)) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RXYZ(1,I) +Ra(2) = RXYZ(2,I) +Ra(3) = RXYZ(3,I) + +do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif +enddo +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_latte + +subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I +real(PREC), intent(in) :: COULACC, TIMERATIO +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) +real(PREC), intent(in) :: U(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +character(10), intent(in) :: Element_Type(Nr_atoms) +integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(out) :: COULOMBV, FCOUL(3) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 +integer :: J,K, ccnt, nnI + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +FCOUL = ZERO +COULOMBV = ZERO + +TI = TFACT*U(I) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RX(I) +Ra(2) = RY(I) +Ra(3) = RZ(I) + +do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif +enddo +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space + +subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Max_Nr_Neigh +real(PREC), intent(in) :: COULACC +real(PREC) :: KECONST, TFACT, RELPERM +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV(Nr_atoms) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: CORRFACT,FOURCALPHA2, FORCE +real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) +real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR +real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + +integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 + +COULCUT = 12.0D0 +CALPHA = SQRTX/COULCUT + +COULCUT2 = COULCUT*COULCUT +KCUTOFF = TWO*CALPHA*SQRTX +KCUTOFF2 = KCUTOFF*KCUTOFF +CALPHA2 = CALPHA*CALPHA +FOURCALPHA2 = FOUR*CALPHA2 + +RECIPVECS = ZERO +RECIPVECS(1,1) = TWO*pi/Box(1,1) +RECIPVECS(2,2) = TWO*pi/Box(2,2) +RECIPVECS(3,3) = TWO*pi/Box(3,3) +LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) +MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) +NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + +RELPERM = 1.D0 +KECONST = 14.3996437701414D0*RELPERM + +COULOMBV = ZERO +SINLIST = ZERO +COSLIST = ZERO + +do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo +enddo + +! Point self energy +CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; +COULOMBV = COULOMBV - CORRFACT*DELTAQ; + +end subroutine Ewald_k_Space_latte + +subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Max_Nr_Neigh +real(PREC), intent(in) :: COULACC, TIMERATIO +real(PREC) :: KECONST, TFACT, RELPERM +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: CORRFACT,FOURCALPHA2, FORCE +real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) +real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR +real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + +integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 + +COULCUT = 12.0D0 +CALPHA = SQRTX/COULCUT + +COULCUT2 = COULCUT*COULCUT +KCUTOFF = TWO*CALPHA*SQRTX +KCUTOFF2 = KCUTOFF*KCUTOFF +CALPHA2 = CALPHA*CALPHA +FOURCALPHA2 = FOUR*CALPHA2 + +RECIPVECS = ZERO +RECIPVECS(1,1) = TWO*pi/LBox(1) +RECIPVECS(2,2) = TWO*pi/LBox(2) +RECIPVECS(3,3) = TWO*pi/LBox(3) +LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) +MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) +NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + +RELPERM = 1.D0 +KECONST = 14.3996437701414D0*RELPERM + +FCOUL = ZERO +COULOMBV = ZERO +SINLIST = ZERO +COSLIST = ZERO + +do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + do I = 1,Nr_atoms + DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) + FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) + FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) + FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) + enddo + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo +enddo + +! Point self energy +CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; +COULOMBV = COULOMBV - CORRFACT*DELTAQ; + +end subroutine Ewald_k_Space + +end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 5c30947c..2e217a98 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -5,11 +5,13 @@ !! module prg_implicit_fermi_mod + use omp_lib use bml use prg_normalize_mod use prg_densitymatrix_mod use prg_timer_mod use prg_parallel_mod + use prg_ewald_mod implicit none @@ -18,13 +20,164 @@ module prg_implicit_fermi_mod integer, parameter :: dp = kind(1.0d0) public :: prg_implicit_fermi + public :: prg_implicit_fermi_save_inverse public :: prg_implicit_fermi_zero public :: prg_test_density_matrix public :: prg_implicit_fermi_response + public :: prg_implicit_fermi_first_order_response public :: prg_finite_diff contains + !> Recursive Implicit Fermi Dirac for finite temperature. + !! \param Inv_bml Inverses generated by algorithm. + !! \param h_bml Input Hamiltonian matrix. + !! \param p_bml Output density matrix. + !! \param nsteps Number of recursion steps. + !! \param nocc Number of occupied states. + !! \param mu Shifted chemical potential + !! \param beta Input inverse temperature. + !! \param occErrLimit Occupation error limit. + !! \param threshold Threshold for multiplication. + !! \param tol Tolerance for linear system solver. + !! \param SCF_IT The current SCF iteration. + !! \param occiter Counts the total nr of DM calculations during MD. + !! See \cite{niklasson2003} + subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, & + mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter) + + implicit none + + type(bml_matrix_t), intent(in) :: h_bml + type(bml_matrix_t), intent(inout) :: p_bml, Inv_bml(nsteps) + integer, intent(in) :: nsteps, SCF_IT + real(dp), intent(in) :: nocc, threshold + real(dp), intent(in) :: tol + real(dp), intent(in) :: occErrLimit, beta + real(dp), intent(inout) :: mu + integer, intent(inout) :: occiter + + type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml + real(dp) :: trdPdmu, trP0, occErr, alpha + real(dp) :: cnst, ofactor, mustep + real(dp), allocatable :: trace(:), gbnd(:) + character(20) :: bml_type + integer :: N, M, i, iter, muadj, prev + + bml_type = bml_get_type(h_bml) + N = bml_get_N(h_bml) + M = bml_get_M(h_bml) + + allocate(trace(2)) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) + + occErr = 10.0_dp + alpha = 1.0_dp + prev = 0 + iter = 0 + cnst = beta/(1.0_dp*2**(nsteps+2)) + + if (SCF_IT .eq. 1) then + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + ! Generate good starting guess for (2*(P2-P)+1)^-1 using conjugate gradient + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + II + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + else + ! Otherwise use previous inverse as starting guess + call bml_copy(Inv_bml(1),ai_bml) + end if + + do while (occErr .gt. occErrLimit .or. muadj .eq. 1) + iter = iter + 1 + muadj = 0 + write(*,*) 'mu =', mu + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + + do i = 1, nsteps + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + I + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + ! Find inverse ai = (2*(P2-P)+I)^-1 + !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + enddo + + trdPdmu = bml_trace(p_bml) + trP0 = trdPdmu + trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 + trdPdmu = beta * trdPdmu + occErr = abs(trP0 - nocc) + write(*,*) 'occerr =', nocc-trP0 + + ! If occupation error is too large, do bisection method + if (occerr > 10.0_dp) then + if (nocc-trP0 < 0.0_dp) then + if (prev .eq. 1) then + alpha = alpha/2 + endif + prev = -1 + mu = mu - alpha + else + if (prev .eq. -1) then + alpha = alpha/2 + endif + prev = 1 + mu = mu + alpha + endif + ! Otherwise do Newton + else if (occErr .gt. occErrLimit) then + mustep = (nocc -trP0)/trdPdmu + if (abs(mustep) > 1.0) then + mustep = 0.1_dp*mustep + end if + mu = mu + mustep + muadj = 1 + end if + enddo + ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. + ! For now we recompute the DM one extra time if mu was adjusted. + !if (muadj .eq. 1) then + ! Adjust occupation + ! call bml_copy(p_bml, d_bml) + ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) + ! ofactor = ((nocc - trP0)/trdPdmu) * beta + ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !end if + occiter = occiter + iter + call bml_scale(2.0_dp,p_bml) + deallocate(trace) + + call bml_deallocate(p2_bml) + call bml_deallocate(w_bml) + call bml_deallocate(d_bml) + call bml_deallocate(y_bml) + call bml_deallocate(aux_bml) + call bml_deallocate(ai_bml) + call bml_deallocate(I_bml) + + end subroutine prg_implicit_fermi_save_inverse + !> Recursive Implicit Fermi Dirac for finite temperature. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. @@ -63,6 +216,11 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & N = bml_get_N(h_bml) M = bml_get_M(h_bml) + call bml_print_matrix("h_bml",h_bml,1,10,1,10) + call bml_print_matrix("p_bml",p_bml,1,10,1,10) + write(*,*) nsteps, k, nocc, & + mu, beta, method, osteps, occErrLimit, threshold, tol + !stop allocate(trace(2)) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) @@ -106,7 +264,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & else call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, aux1_bml, w_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -123,9 +281,9 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux1_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux1_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo @@ -185,7 +343,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, real(dp), intent(in) :: mu, threshold real(dp), intent(inout), optional :: tol - type(bml_matrix_t) :: w_bml, y_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml + type(bml_matrix_t) :: w_bml, y_bml, c_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml real(dp) :: cnst real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type @@ -201,6 +359,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, c_bml) if (method .eq. 1) then call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) @@ -221,7 +380,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_copy(p2_bml, y_bml) call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, c_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -232,9 +391,9 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, c_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, c_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo endif @@ -246,6 +405,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) + call bml_deallocate(c_bml) if (method .eq. 1) then call bml_deallocate(ai_bml) call bml_deallocate(I_bml) @@ -253,6 +413,89 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, end subroutine prg_implicit_fermi_zero + + !> Calculate first order density matrix response to perturbations using Implicit Fermi Dirac. + !! \param H0_bml Input Hamiltonian matrix. + !! \param H1_bml Input First order perturbation of H0. + !! \param P0_bml Output density matrix. + !! \param P1_bml Output First order density matrix response. + !! \param nsteps Number of recursion steps. + !! \param mu0 Shifted chemical potential. + !! \param beta Input inverse temperature. + !! \param nocc Number of occupied states. + !! \param threshold Threshold for matrix algebra. + !! See \cite{niklasson2015} + subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bml, & + Inv_bml, nsteps, mu0, beta, nocc, threshold) + + implicit none + + type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, Inv_bml(nsteps) + type(bml_matrix_t), intent(inout) :: P0_bml, P1_bml + real(dp), intent(in) :: mu0, threshold + real(dp) :: mu1 + real(dp), intent(in) :: beta, nocc + integer, intent(in) :: nsteps + type(bml_matrix_t) :: B_bml, C_bml, C0_bml + character(20) :: bml_type + real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst + integer :: N, M, i, j, k + + bml_type = bml_get_type(H0_bml) + N = bml_get_N(H0_bml) + M = bml_get_M(H0_bml) + + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) + + cnst = beta/(2**(2+nsteps)) + + ! P0 = 0.5*II - cnst*(H0-mu0*II) + call bml_copy(H0_bml, P0_bml) + call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) + + ! P1 = - cnst*H1 + call bml_copy(H1_bml, P1_bml) + call bml_scale(-1.0_dp*cnst, P1_bml) + do i = 1, nsteps + + ! Calculate coefficient matrices + ! C0 = P0^2 + call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! C = P0*P1+P1*P0, B = 2(P1 - C) + call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_copy(P1_bml, B_bml) + call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P0 + call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) + ! Get next P1 + ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) + call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + enddo + + ! dPdmu = beta*P0(I-P0) + call bml_copy(P0_bml, B_bml) + call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_scale(beta,C_bml) + dPdmu_trace = bml_trace(C_bml) + p1_trace = bml_trace(P1_bml) + mu1 = - p1_trace/dPdmu_trace + if (abs(dPdmu_trace) > 1e-8) then + call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + endif + + call bml_deallocate(B_bml) + call bml_deallocate(C_bml) + call bml_deallocate(C0_bml) + + + end subroutine prg_implicit_fermi_first_order_response + + !> Calculate density matrix response to perturbations using Implicit Fermi Dirac. !! \param H0_bml Input Hamiltonian matrix. !! \param H1_bml, H2_bml, H3_bml Input First to third order perturbations of H0. @@ -270,7 +513,7 @@ end subroutine prg_implicit_fermi_zero !! See \cite{niklasson2015} subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, & nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold) - + implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, H2_bml, H3_bml @@ -279,7 +522,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P real(dp), allocatable, intent(inout) :: mu(:) real(dp), intent(in) :: beta, occ_tol, lin_tol, nocc integer, intent(in) :: nsteps - type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, C0_bml, T_bml, Ti_bml + type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, tmp3_bml, C0_bml, T_bml, Ti_bml type(bml_matrix_t), allocatable :: B_bml(:), P_bml(:), C_bml(:), H_bml(:) real(dp), allocatable :: p_trace(:), trace(:) character(20) :: bml_type @@ -305,6 +548,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P end do call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp1_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp3_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, T_bml) @@ -371,10 +615,10 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P call bml_scale_add_identity(T_bml, 2.0_dp, 1.0_dp, threshold) ! Find T-inverse if (i .eq. 1) then - call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, 0.01_dp, threshold) + call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, tmp3_bml,0.01_dp, threshold) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) end if - call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, lin_tol, threshold) + call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, tmp3_bml, I_bml, lin_tol, threshold) ! Get next P0 call bml_multiply(Ti_bml, C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) ! Get next P1 @@ -608,31 +852,39 @@ end subroutine prg_setup_linsys !! \param tol Convergence criterion (Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, tol, threshold) + subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, threshold) implicit none - type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml - type(bml_matrix_t), intent(in) :: a_bml + type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml, d_bml + type(bml_matrix_t), intent(in) :: a_bml, I_bml real(dp), intent(in) :: threshold, tol - real(dp) :: norm - integer :: i + real(dp) :: err,prev_err,scaled_tol + integer :: i,N,N2 - norm = 1.0 + N = bml_get_N(a_bml) + err = 100000.0 i = 0 - do while(norm > tol) + N2 = N*N + scaled_tol = tol*N + do while(err > scaled_tol) + !write(*,*) 'iter = ', i call bml_copy(ai_bml, tmp_bml) call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold) call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold) - norm = bml_fnorm(r_bml) - ! write(*,*) "norm = ", norm - if (norm < tol) then - exit - end if - call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + prev_err = err + err = bml_fnorm(r_bml) + !write(*,*) "err = ", err + !write(*,*) "prev_err = ", prev_err + if (10*prev_err < err) then + write(*,*) 'NS did not converge, calling conjugate gradient' + call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) + else + call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + endif i = i + 1 enddo - ! write(*,*) "Number of NS iterations:", i + !write(*,*) "Number of NS iterations:", i end subroutine prg_newtonschulz ! Preconditioned CG, preconditioner inverse diagonal of A @@ -717,42 +969,42 @@ end subroutine prg_pcg !! \param w_bml Auxillary matrix !! \param cg_tol Convergence condition (OBS squared Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_conjgrad(A_bml, p_bml, p2_bml, d_bml, w_bml, cg_tol, threshold) + subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, threshold) implicit none - type(bml_matrix_t), intent(in) :: A_bml - type(bml_matrix_t), intent(inout) :: p_bml, p2_bml, d_bml, w_bml + type(bml_matrix_t), intent(in) :: A_bml, p2_bml + type(bml_matrix_t), intent(inout) :: p_bml, tmp_bml, d_bml, w_bml real(dp), intent(in) :: cg_tol, threshold real(dp) :: alpha, beta integer :: k real(dp) :: r_norm_old, r_norm_new - call bml_multiply(A_bml, p_bml, p2_bml, -1.0_dp, 1.0_dp, threshold) - r_norm_new = bml_sum_squares(p2_bml) + call bml_copy(p2_bml,tmp_bml) + call bml_multiply(A_bml, p_bml, tmp_bml, -1.0_dp, 1.0_dp, threshold) + r_norm_new = bml_sum_squares(tmp_bml) k = 0 do while (r_norm_new .gt. cg_tol) - ! write(*,*) r_norm_new + write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then - write(*,*) r_norm_new - call bml_copy(p2_bml, d_bml) + call bml_copy(tmp_bml, d_bml) else beta = r_norm_new/r_norm_old - call bml_add(d_bml, p2_bml, beta, 1.0_dp, threshold) + call bml_add(d_bml, tmp_bml, beta, 1.0_dp, threshold) endif call bml_multiply(A_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) alpha = r_norm_new/bml_trace_mult(d_bml, w_bml) call bml_add(p_bml, d_bml, 1.0_dp, alpha, threshold) - call bml_add(p2_bml, w_bml, 1.0_dp, -alpha, threshold) + call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold) r_norm_old = r_norm_new - r_norm_new = bml_sum_squares(p2_bml) - if (k .gt. 50) then + r_norm_new = bml_sum_squares(tmp_bml) + if (k .gt. 500) then write(*,*) "Conjugate gradient is not converging" stop endif diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 new file mode 100644 index 00000000..54f4ecf1 --- /dev/null +++ b/src/prg_xlbokernel_mod.F90 @@ -0,0 +1,837 @@ +!> Pre-conditioned O(N) calculation of the kernel for XL-BOMD. +!! \ingroup PROGRESS +!! \brief Here are subroutines implementing Niklasson's scheme for +!! low-rank, Krylov subspace approximation of the kernel. +module prg_xlbokernel_mod + + use omp_lib + use bml + use prg_normalize_mod + use prg_densitymatrix_mod + use prg_timer_mod + use prg_parallel_mod + use prg_ewald_mod + use prg_implicit_fermi_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: prg_kernel_multirank + public :: prg_kernel_multirank_latte + public :: prg_kernel_matrix_multirank + public :: prg_full_kernel + public :: prg_full_kernel_latte + +contains + +subroutine Invert(A,AI,N) + +implicit none +integer, parameter :: PREC = 8 +integer, intent(in) :: N +real(PREC), intent(in) :: A(N,N) +real(PREC), intent(out) :: AI(N,N) +real(PREC) :: WORK(N+N*N)!, C(N,N) +integer :: LDA, LWORK, M, INFO, IPIV(N) +integer :: I,J,K + +external DGETRF +external DGETRI + +AI = A +LDA = N +M = N +LWORK = N+N*N + +call DGETRF(M, N, AI, LDA, IPIV, INFO) +call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) + +end subroutine Invert + +!> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) +!! \param KRes The low rank approximation +!! \param KK0_bml The pre-conditioner K0. +!! \param Res The residual q[n]-n +!! \param FelTol Relative error tolerance for approximation +!! \param L Number of vectors used. +!! \param LMAX Maximum nr of vectors to use. +!! \param NUMRANK Nr of vectors to use. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param mu The chemical potiential. +!! \param beta Scaled inverse temperature. +!! \param RXYZ Nuclear coordinates. +!! \param Box Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Pointer List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy. +!! \param nebcoul Neighbour lists. +!! \param totnebcoul Number of neighbours list. +!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Nocc Occupation. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param Nr_elem Number of elements in Hubbard list. +subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) + real(dp), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(20) :: bml_type + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It,N,MN + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp, start, finish + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + + call timer_prg_init() + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + + call bml_transpose(Z_bml,ZT_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_set_row(Res_bml,1,Res,threshold) + call bml_transpose(KK0_bml,KK0T_bml) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + ! Compute H1 = H(v) + dq_v = v + call prg_timer_start(1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & + dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call prg_timer_stop(1,1) + + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1,Nr_atoms-1 + do K = Hinxlist(It)+1,Hinxlist(It+1) + row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + ! H1 = 1/2(S*H1+H1*S) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + ! H1 = Z^T H1 Z + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + ! D1 = Z D1 Z^T + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute dq/dv + call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + call bml_deallocate(ZT_bml) + call prg_timer_shutdown() + + end subroutine prg_kernel_multirank_latte + + ! Above routine but for development code + subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_transpose(KK0_bml,KK0T_bml) + call bml_set_row(Res_bml,1,Res,ONE*1e-14) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + + end subroutine prg_kernel_multirank + +!> Compute full inverse Jacobian of q[n]-n (for LATTE) +!! \param KK The inverse Jacobian. +!! \param DO_bml Orthogonalized density matrix. +!! \param mu0 The chemical potiential. +!! \param RXYZ Nuclear coordinates. +!! \param Box Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Pointer List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy +!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param Nocc Occupation. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param beta Scaled inverse temperature. +!! \param Nr_elem Number of elements in Hubbard list. +subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & +Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & +Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & +Nocc,m_rec,threshold,beta,Nr_elem) + +use bml + +implicit none +integer, parameter :: PREC = 8, dp = kind(1.0d0) +integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 +real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K +real(PREC), intent(in) :: Coulomb_acc, threshold,beta +real(PREC) :: v(Nr_atoms) +real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) +integer, intent(in) :: Hinxlist(HDIM) +real(PREC), intent(in) :: Hubbard_U(Nr_elem) +type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) +type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml +type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml +real(PREC), intent(inout) :: mu0 +integer, intent(in) :: Element_Pointer(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) +real(PREC) :: dq_v(Nr_atoms) +real(PREC) :: dq_dv(Nr_atoms), err,tol +real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) +integer :: I,J,K, ITER, mm,It,N,MN +real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) +type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml +character(20) :: bml_type + + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_transpose(Z_bml,ZT_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & + dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & + Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms-1 + do K = Hinxlist(I)+1,Hinxlist(I+1) + row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + JJ(It,J) = dq_dv(It) + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + + call Invert(JJ,KK,Nr_atoms) + + deallocate(row1); deallocate(row2); deallocate(JJ) + call bml_deallocate(ZT_bml) + +end subroutine prg_full_kernel_latte + +!> Compute full inverse Jacobian of q[n]-n (for development code) +!! \param KK The inverse Jacobian. +!! \param DO_bml Orthogonalized density matrix. +!! \param mu0 The chemical potiential. +!! \param RX,RY,RZ Nuclear coordinates. +!! \param Lbox Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Type List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy +!! \param TIMERATIO Parameter for Ewald +!! \param nnRx,nnRy,nnRz Neighbour lists. +!! \param nrnnlist Number of neighbours list. +!! \param nnType Refers to original order of atoms. +!! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param Nocc Occupation. +!! \param Znuc List of nuclear charges. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param beta Scaled inverse temperature. +!! \param diagonal Auxillary vector. + +subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & +Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & +nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & +Nocc,Znuc,m_rec,threshold,beta,diagonal) + +use bml + +implicit none +integer, parameter :: PREC = 8, dp = kind(1.0d0) +integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 +real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K +real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta +real(PREC) :: v(Nr_atoms) +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) +integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) +real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) +type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) +type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml +type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml +real(PREC), intent(inout) :: mu0, diagonal(HDIM) +character(10), intent(in) :: Element_Type(Nr_atoms) +integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) +real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) +real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) +real(PREC) :: dq_v(Nr_atoms) +real(PREC) :: dq_dv(Nr_atoms) +real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) +integer :: I,J,K, ITER, mm,It +real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + call Ewald_Real_Space_Single(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,I,RX,RY,RZ,LBox, & + dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + diagonal = 0.0_dp + do I = 1,HDIM + call bml_set_row(H1_bml,I,diagonal,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms + do K = H_INDEX_START(I),H_INDEX_END(I) + diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,diagonal,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + JJ(It,J) = dq_dv(It) + enddo + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + call Invert(JJ,KK,Nr_atoms) + deallocate(row1); deallocate(row2); deallocate(JJ) + +end subroutine prg_full_kernel + + +! Compute the low-rank kernel matrix. (For development code) + subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: KK0_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(out) :: KRes(Nr_atoms) + integer :: I,J,K,It,col,row + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp,elem + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) + + allocate(row1(HDIM));allocate(row2(HDIM)); + + dr = Res + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + + ! Does not work, need to normalize matrix + if ((Fel > FelTol).AND.(I < (LMAX))) then + deallocate(O, M) + else + do row = 1,Nr_atoms + do col = 1,Nr_atoms + elem = 0.0 + do K = 1,L + do J = 1,L + elem = elem + M(J,K)*vi(row,J)*fi(col,K) + enddo + enddo + if (abs(elem) > threshold) then + call bml_set_element(KK0_bml,row,col,elem) + endif + enddo + enddo + deallocate(O,M) + endif + enddo + deallocate(row1); deallocate(row2); + + end subroutine prg_kernel_matrix_multirank + +end module prg_xlbokernel_mod From 7960bcfbbd5f25863131277e80ddc4b92abe3d02 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Wed, 12 May 2021 19:34:44 +0200 Subject: [PATCH 02/10] O(N) pre-conditioned kernel matrix with implicit FD-expansion response --- src/CMakeLists.txt | 1 + src/prg_ewald_mod.F90 | 1386 ++++++++++++++--------------- src/prg_implicit_fermi_mod.F90 | 210 ++--- src/prg_timer_mod.F90 | 186 ++-- src/prg_xlbokernel_mod.F90 | 1505 ++++++++++++++++---------------- tests/CMakeLists.txt | 1 + tests/src/main.F90 | 1474 ++++++++++++++++--------------- 7 files changed, 2403 insertions(+), 2360 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index beaa317a..becd6040 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -125,6 +125,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_timer_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbo_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_xlkernel_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbokernel_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/slaterkosterforce_latte_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/tbparams_latte_mod.mod DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index 5a032412..95127797 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -1,4 +1,4 @@ -! Ewald sum routines for kernel calculation +! Ewald sum routines for kernel calculation module prg_ewald_mod use bml @@ -20,550 +20,550 @@ module prg_ewald_mod contains -!> Find Coulomb potential on site I from single charge at site J -subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & - DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) -real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) -real(PREC), intent(in) :: U(Nr_elem) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 - -integer :: K, ccnt,l,m,n - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -COULOMBV = ZERO - -TI = TFACT*U(Element_Pointer(I)) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RXYZ(1,I) -Ra(2) = RXYZ(2,I) -Ra(3) = RXYZ(3,I) + !> Find Coulomb potential on site I from single charge at site J + subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & + DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) + real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) do k = -1,1 - do m = -1,1 - do l = -1,1 - - Rb(1) = RXYZ(1,J)+k*box(1,1) - Rb(2) = RXYZ(2,J)+m*box(2,2) - Rb(3) = RXYZ(3,J)+l*box(3,3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(Element_Pointer(J)) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - EXPTI = exp(-TI*MAGR ) - - if (Element_Pointer(I).eq.Element_Pointer(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - endif - endif - enddo - enddo + do m = -1,1 + do l = -1,1 + + Rb(1) = RXYZ(1,J)+k*box(1,1) + Rb(2) = RXYZ(2,J)+m*box(2,2) + Rb(3) = RXYZ(3,J)+l*box(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + endif + enddo + enddo enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_Single_latte - -subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & - DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) - - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J -real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) -real(PREC), intent(in) :: U(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -character(10), intent(in) :: Element_Type(Nr_atoms) -real(PREC), intent(out) :: COULOMBV, FCOUL(3) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 - -integer :: K, ccnt,l,m,n - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -FCOUL = ZERO -COULOMBV = ZERO - -TI = TFACT*U(I) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RX(I) -Ra(2) = RY(I) -Ra(3) = RZ(I) + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single_latte + + subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) + + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J + real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) do k = -1,1 - do m = -1,1 - do l = -1,1 - - Rb(1) = RX(J)+k*Lbox(1) - Rb(2) = RY(J)+m*Lbox(2) - Rb(3) = RZ(J)+l*Lbox(3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(J) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR - EXPTI = exp(-TI*MAGR ) - - if (Element_Type(I).eq.Element_Type(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & - + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & - + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) - endif - - FCOUL(1) = FCOUL(1) + DC(1)*FORCE - FCOUL(2) = FCOUL(2) + DC(2)*FORCE - FCOUL(3) = FCOUL(3) + DC(3)*FORCE - endif - enddo + do m = -1,1 + do l = -1,1 + + Rb(1) = RX(J)+k*Lbox(1) + Rb(2) = RY(J)+m*Lbox(2) + Rb(3) = RZ(J)+l*Lbox(3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + enddo enddo + + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single + + subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & + DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_latte + + subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_Single - -subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & - DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem -real(PREC), intent(in) :: COULACC -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) -real(PREC), intent(in) :: U(Nr_elem) -real(PREC) :: COULCUT, COULCUT2 -integer, intent(in) :: Element_Pointer(Nr_atoms) -integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) -real(PREC), intent(out) :: COULOMBV -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 -integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -COULOMBV = ZERO - -TI = TFACT*U(Element_Pointer(I)) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RXYZ(1,I) -Ra(2) = RXYZ(2,I) -Ra(3) = RXYZ(3,I) - -do newj = 1,totnebcoul(I) - J = NEBCOUL(1, NEWJ, I) - PBCI = NEBCOUL(2, NEWJ, I) - PBCJ = NEBCOUL(3, NEWJ, I) - PBCK = NEBCOUL(4, NEWJ, I) - Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & - REAL(PBCK)*BOX(3,1) - - Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & - REAL(PBCK)*BOX(3,2) - - Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & - REAL(PBCK)*BOX(3,3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(Element_Pointer(J)) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - EXPTI = exp(-TI*MAGR ) - - if (Element_Pointer(I).eq.Element_Pointer(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - endif - - endif -enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_latte - -subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & - DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I -real(PREC), intent(in) :: COULACC, TIMERATIO -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) -real(PREC), intent(in) :: U(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -character(10), intent(in) :: Element_Type(Nr_atoms) -integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(out) :: COULOMBV, FCOUL(3) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 -integer :: J,K, ccnt, nnI - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -FCOUL = ZERO -COULOMBV = ZERO - -TI = TFACT*U(I) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RX(I) -Ra(2) = RY(I) -Ra(3) = RZ(I) - -do nnI = 1,nrnnlist(I) - Rb(1) = nnRx(I,nnI) - Rb(2) = nnRy(I,nnI) - Rb(3) = nnRz(I,nnI) - J = nnType(I,nnI) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(J) - DC = Rab/dR - - ! Not Using Numerical Recipes ERFC - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - !TEST(ccnt) = DELTAQ(J)*CA - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR - EXPTI = exp(-TI*MAGR ) - - if (Element_Type(I).eq.Element_Type(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & - + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & - + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) - endif - - FCOUL(1) = FCOUL(1) + DC(1)*FORCE - FCOUL(2) = FCOUL(2) + DC(2)*FORCE - FCOUL(3) = FCOUL(3) + DC(3)*FORCE - endif -enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space - -subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Max_Nr_Neigh -real(PREC), intent(in) :: COULACC -real(PREC) :: KECONST, TFACT, RELPERM -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV(Nr_atoms) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: CORRFACT,FOURCALPHA2, FORCE -real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) -real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR -real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 - -integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 - -COULCUT = 12.0D0 -CALPHA = SQRTX/COULCUT - -COULCUT2 = COULCUT*COULCUT -KCUTOFF = TWO*CALPHA*SQRTX -KCUTOFF2 = KCUTOFF*KCUTOFF -CALPHA2 = CALPHA*CALPHA -FOURCALPHA2 = FOUR*CALPHA2 - -RECIPVECS = ZERO -RECIPVECS(1,1) = TWO*pi/Box(1,1) -RECIPVECS(2,2) = TWO*pi/Box(2,2) -RECIPVECS(3,3) = TWO*pi/Box(3,3) -LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) -MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) -NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) - -RELPERM = 1.D0 -KECONST = 14.3996437701414D0*RELPERM - -COULOMBV = ZERO -SINLIST = ZERO -COSLIST = ZERO - -do L = 0,LMAX - - if (L.eq.0) then - MMIN = 0 - else - MMIN = -MMAX - endif - - L11 = L*RECIPVECS(1,1) - L12 = L*RECIPVECS(1,2) - L13 = L*RECIPVECS(1,3) - - do M = MMIN,MMAX + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space + + subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX NMIN = -NMAX if ((L==0).and.(M==0)) then @@ -575,122 +575,122 @@ subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_ M23 = L13 + M*RECIPVECS(2,3) do N = NMIN,NMAX - K(1) = M21 + N*RECIPVECS(3,1) - K(2) = M22 + N*RECIPVECS(3,2) - K(3) = M23 + N*RECIPVECS(3,3) - K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) - if (K2.le.KCUTOFF2) then - PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) - PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); - - COSSUM = 0.D0 - SINSUM = 0.D0 - - ! Doing the sin and cos sums - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & - !$OMP REDUCTION(+:COSSUM) & - !$OMP REDUCTION(+:SINSUM) - do I = 1,Nr_atoms - DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) - ! We re-use these in the next loop... - SINLIST(I) = sin(DOT) - COSLIST(I) = cos(DOT) - COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) - SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) - enddo - !$OMP END PARALLEL DO - COSSUM2 = COSSUM*COSSUM - SINSUM2 = SINSUM*SINSUM - - ! Add up energy and force contributions - - KEPREF = KECONST*PREFACTOR - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) - do I = 1,Nr_atoms - COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - enddo - !$OMP END PARALLEL DO - - KEPREF = KEPREF*(COSSUM2 + SINSUM2) - endif + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif enddo - enddo -enddo - -! Point self energy -CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; -COULOMBV = COULOMBV - CORRFACT*DELTAQ; - -end subroutine Ewald_k_Space_latte - -subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Max_Nr_Neigh -real(PREC), intent(in) :: COULACC, TIMERATIO -real(PREC) :: KECONST, TFACT, RELPERM -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: CORRFACT,FOURCALPHA2, FORCE -real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) -real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR -real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 - -integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 - -COULCUT = 12.0D0 -CALPHA = SQRTX/COULCUT - -COULCUT2 = COULCUT*COULCUT -KCUTOFF = TWO*CALPHA*SQRTX -KCUTOFF2 = KCUTOFF*KCUTOFF -CALPHA2 = CALPHA*CALPHA -FOURCALPHA2 = FOUR*CALPHA2 - -RECIPVECS = ZERO -RECIPVECS(1,1) = TWO*pi/LBox(1) -RECIPVECS(2,2) = TWO*pi/LBox(2) -RECIPVECS(3,3) = TWO*pi/LBox(3) -LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) -MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) -NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) - -RELPERM = 1.D0 -KECONST = 14.3996437701414D0*RELPERM - -FCOUL = ZERO -COULOMBV = ZERO -SINLIST = ZERO -COSLIST = ZERO - -do L = 0,LMAX - - if (L.eq.0) then - MMIN = 0 - else - MMIN = -MMAX - endif - - L11 = L*RECIPVECS(1,1) - L12 = L*RECIPVECS(1,2) - L13 = L*RECIPVECS(1,3) - - do M = MMIN,MMAX + enddo + enddo + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte + + subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/LBox(1) + RECIPVECS(2,2) = TWO*pi/LBox(2) + RECIPVECS(3,3) = TWO*pi/LBox(3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + FCOUL = ZERO + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX NMIN = -NMAX if ((L==0).and.(M==0)) then @@ -702,50 +702,50 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI M23 = L13 + M*RECIPVECS(2,3) do N = NMIN,NMAX - K(1) = M21 + N*RECIPVECS(3,1) - K(2) = M22 + N*RECIPVECS(3,2) - K(3) = M23 + N*RECIPVECS(3,3) - K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) - if (K2.le.KCUTOFF2) then - PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) - PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); - - COSSUM = 0.D0 - SINSUM = 0.D0 - - ! Doing the sin and cos sums - do I = 1,Nr_atoms - DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) - ! We re-use these in the next loop... - SINLIST(I) = sin(DOT) - COSLIST(I) = cos(DOT) - COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) - SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) - enddo - COSSUM2 = COSSUM*COSSUM - SINSUM2 = SINSUM*SINSUM - - ! Add up energy and force contributions - - KEPREF = KECONST*PREFACTOR - do I = 1,Nr_atoms - COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) - FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) - FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) - FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) - enddo - - KEPREF = KEPREF*(COSSUM2 + SINSUM2) - endif + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + do I = 1,Nr_atoms + DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) + FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) + FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) + FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) + enddo + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif enddo - enddo -enddo + enddo + enddo -! Point self energy -CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; -COULOMBV = COULOMBV - CORRFACT*DELTAQ; + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; -end subroutine Ewald_k_Space + end subroutine Ewald_k_Space end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 2e217a98..76bd2bc2 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -30,7 +30,7 @@ module prg_implicit_fermi_mod contains !> Recursive Implicit Fermi Dirac for finite temperature. - !! \param Inv_bml Inverses generated by algorithm. + !! \param Inv_bml Inverses generated by algorithm. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. !! \param nsteps Number of recursion steps. @@ -40,8 +40,8 @@ module prg_implicit_fermi_mod !! \param occErrLimit Occupation error limit. !! \param threshold Threshold for multiplication. !! \param tol Tolerance for linear system solver. - !! \param SCF_IT The current SCF iteration. - !! \param occiter Counts the total nr of DM calculations during MD. + !! \param SCF_IT The current SCF iteration. + !! \param occiter Counts the total nr of DM calculations during MD. !! See \cite{niklasson2003} subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, & mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter) @@ -58,8 +58,8 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, integer, intent(inout) :: occiter type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml - real(dp) :: trdPdmu, trP0, occErr, alpha - real(dp) :: cnst, ofactor, mustep + real(dp) :: trdPdmu, trP0, occErr, alpha + real(dp) :: cnst, ofactor, mustep real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type integer :: N, M, i, iter, muadj, prev @@ -75,7 +75,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) - call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) occErr = 10.0_dp alpha = 1.0_dp @@ -97,10 +97,10 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) else ! Otherwise use previous inverse as starting guess - call bml_copy(Inv_bml(1),ai_bml) + call bml_copy(Inv_bml(1),ai_bml) end if - do while (occErr .gt. occErrLimit .or. muadj .eq. 1) + do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < 50) iter = iter + 1 muadj = 0 write(*,*) 'mu =', mu @@ -109,18 +109,18 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_copy(h_bml, p_bml) call prg_normalize_implicit_fermi(p_bml, cnst, mu) - do i = 1, nsteps - call bml_multiply_x2(p_bml, p2_bml, threshold, trace) - ! Y = 2*(P2-P) + I - call bml_copy(p2_bml, y_bml) - call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) - call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) - ! Find inverse ai = (2*(P2-P)+I)^-1 - !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) - call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) - call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation - enddo + do i = 1, nsteps + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + I + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + ! Find inverse ai = (2*(P2-P)+I)^-1 + !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + enddo trdPdmu = bml_trace(p_bml) trP0 = trdPdmu @@ -129,40 +129,44 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, occErr = abs(trP0 - nocc) write(*,*) 'occerr =', nocc-trP0 - ! If occupation error is too large, do bisection method - if (occerr > 10.0_dp) then - if (nocc-trP0 < 0.0_dp) then - if (prev .eq. 1) then - alpha = alpha/2 - endif - prev = -1 - mu = mu - alpha - else - if (prev .eq. -1) then - alpha = alpha/2 - endif - prev = 1 - mu = mu + alpha - endif - ! Otherwise do Newton + ! If occupation error is too large, do bisection method + if (occerr > 10.0_dp) then + if (nocc-trP0 < 0.0_dp) then + if (prev .eq. 1) then + alpha = alpha/2 + endif + prev = -1 + mu = mu - alpha + else + if (prev .eq. -1) then + alpha = alpha/2 + endif + prev = 1 + mu = mu + alpha + endif + ! Otherwise do Newton else if (occErr .gt. occErrLimit) then mustep = (nocc -trP0)/trdPdmu if (abs(mustep) > 1.0) then mustep = 0.1_dp*mustep - end if - mu = mu + mustep + end if + mu = mu + mustep muadj = 1 end if enddo - ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. - ! For now we recompute the DM one extra time if mu was adjusted. + + if (iter .ge. 50) then + write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' + end if + ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. + ! For now we recompute the DM one extra time if mu was adjusted. !if (muadj .eq. 1) then - ! Adjust occupation - ! call bml_copy(p_bml, d_bml) - ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) - ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) - ! ofactor = ((nocc - trP0)/trdPdmu) * beta - ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + ! Adjust occupation + ! call bml_copy(p_bml, d_bml) + ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) + ! ofactor = ((nocc - trP0)/trdPdmu) * beta + ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) !end if occiter = occiter + iter call bml_scale(2.0_dp,p_bml) @@ -177,7 +181,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_deallocate(I_bml) end subroutine prg_implicit_fermi_save_inverse - + !> Recursive Implicit Fermi Dirac for finite temperature. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. @@ -216,17 +220,12 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & N = bml_get_N(h_bml) M = bml_get_M(h_bml) - call bml_print_matrix("h_bml",h_bml,1,10,1,10) - call bml_print_matrix("p_bml",p_bml,1,10,1,10) - write(*,*) nsteps, k, nocc, & - mu, beta, method, osteps, occErrLimit, threshold, tol - !stop allocate(trace(2)) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux1_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux2_bml) endif @@ -292,8 +291,10 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if write(*,*) "mu =", mu enddo @@ -304,7 +305,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("P adjusted occupation",p_bml,0,10,0,10) deallocate(trace) @@ -313,7 +314,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_deallocate(aux1_bml) call bml_deallocate(aux2_bml) endif @@ -438,7 +439,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm integer, intent(in) :: nsteps type(bml_matrix_t) :: B_bml, C_bml, C0_bml character(20) :: bml_type - real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst + real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst integer :: N, M, i, j, k bml_type = bml_get_type(H0_bml) @@ -451,46 +452,46 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm cnst = beta/(2**(2+nsteps)) - ! P0 = 0.5*II - cnst*(H0-mu0*II) - call bml_copy(H0_bml, P0_bml) - call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) - - ! P1 = - cnst*H1 - call bml_copy(H1_bml, P1_bml) - call bml_scale(-1.0_dp*cnst, P1_bml) - do i = 1, nsteps - - ! Calculate coefficient matrices - ! C0 = P0^2 - call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) - ! C = P0*P1+P1*P0, B = 2(P1 - C) - call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) - call bml_copy(P1_bml, B_bml) - call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) - ! Get next P0 - call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) - ! Get next P1 - ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) - call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) - call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) - enddo + ! P0 = 0.5*II - cnst*(H0-mu0*II) + call bml_copy(H0_bml, P0_bml) + call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) + + ! P1 = - cnst*H1 + call bml_copy(H1_bml, P1_bml) + call bml_scale(-1.0_dp*cnst, P1_bml) + do i = 1, nsteps + + ! Calculate coefficient matrices + ! C0 = P0^2 + call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! C = P0*P1+P1*P0, B = 2(P1 - C) + call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_copy(P1_bml, B_bml) + call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P0 + call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) + ! Get next P1 + ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) + call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + enddo - ! dPdmu = beta*P0(I-P0) - call bml_copy(P0_bml, B_bml) - call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) - call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_scale(beta,C_bml) - dPdmu_trace = bml_trace(C_bml) - p1_trace = bml_trace(P1_bml) - mu1 = - p1_trace/dPdmu_trace - if (abs(dPdmu_trace) > 1e-8) then - call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) - endif + ! dPdmu = beta*P0(I-P0) + call bml_copy(P0_bml, B_bml) + call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_scale(beta,C_bml) + dPdmu_trace = bml_trace(C_bml) + p1_trace = bml_trace(P1_bml) + mu1 = - p1_trace/dPdmu_trace + if (abs(dPdmu_trace) > 1e-8) then + call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + endif - call bml_deallocate(B_bml) - call bml_deallocate(C_bml) - call bml_deallocate(C0_bml) + call bml_deallocate(B_bml) + call bml_deallocate(C_bml) + call bml_deallocate(C0_bml) end subroutine prg_implicit_fermi_first_order_response @@ -513,7 +514,7 @@ end subroutine prg_implicit_fermi_first_order_response !! See \cite{niklasson2015} subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, & nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold) - + implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, H2_bml, H3_bml @@ -868,7 +869,8 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th N2 = N*N scaled_tol = tol*N do while(err > scaled_tol) - !write(*,*) 'iter = ', i + !write(*,*) 'iter = ', i + !write(*,*) 'ns error =', err call bml_copy(ai_bml, tmp_bml) call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold) call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold) @@ -876,12 +878,12 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th err = bml_fnorm(r_bml) !write(*,*) "err = ", err !write(*,*) "prev_err = ", prev_err - if (10*prev_err < err) then + if (10*prev_err < err) then write(*,*) 'NS did not converge, calling conjugate gradient' call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) - else + else call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) - endif + endif i = i + 1 enddo !write(*,*) "Number of NS iterations:", i @@ -1004,7 +1006,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold) r_norm_old = r_norm_new r_norm_new = bml_sum_squares(tmp_bml) - if (k .gt. 500) then + if (k .gt. 50) then write(*,*) "Conjugate gradient is not converging" stop endif @@ -1124,8 +1126,10 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if !write(*,*) "mu = ", mu enddo @@ -1136,7 +1140,7 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr call bml_multiply(p_bml, aux_bml, aux1_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("Diagonalization - Adjusted occupation",p_bml,0,10,0,10) call bml_deallocate(eigenvectors_bml) diff --git a/src/prg_timer_mod.F90 b/src/prg_timer_mod.F90 index 204a90ca..c480b0d0 100644 --- a/src/prg_timer_mod.F90 +++ b/src/prg_timer_mod.F90 @@ -1,17 +1,17 @@ !> The timer module. !! \ingroup PROGRESS !! -!! \brief Sets up timers that can be used to time other routines. +!! \brief Sets up timers that can be used to time other routines. !! -!! Example use of dynamic timing: +!! Example use of dynamic timing: !! !! call timer_prg_init() -!! +!! !! call prg_timer_start(dyn_timer,"timer_tag") !! -!! .... code lines ... +!! .... code lines ... !! -!! call prg_timer_stop(dyn_timer,1) +!! call prg_timer_stop(dyn_timer,1) !! !! !! This will write the time it takes to execute "code lines" and it will name it "timer_tag" @@ -53,41 +53,41 @@ module prg_timer_mod !> Timer status type type timer_status_t - !> Timer name - character(LEN=20) :: tname + !> Timer name + character(LEN=20) :: tname - !> Start time - integer :: tstart + !> Start time + integer :: tstart - !> Current total time - integer :: ttotal + !> Current total time + integer :: ttotal - !> Current call count - integer :: tcount + !> Current call count + integer :: tcount - !> Rank with min value - integer :: minRank + !> Rank with min value + integer :: minRank - !> Rank with max value - integer :: maxRank + !> Rank with max value + integer :: maxRank - !> Sum time - total time in secs - real(dp):: tsum + !> Sum time - total time in secs + real(dp):: tsum - !> Minimum value over all ranks - real(dp) :: minValue + !> Minimum value over all ranks + real(dp) :: minValue - !> Maximum value over all ranks - real(dp) :: maxValue + !> Maximum value over all ranks + real(dp) :: maxValue - !> Average value over all ranks - real(dp) :: tavg + !> Average value over all ranks + real(dp) :: tavg - !> Stdev across all ranks - real(dp) :: tstdev + !> Stdev across all ranks + real(dp) :: tstdev - !> Percent of time across all timers - real(dp) :: tpercent + !> Percent of time across all timers + real(dp) :: tpercent end type timer_status_t @@ -114,7 +114,7 @@ module prg_timer_mod ! ! ptimer(loop_timer)%tname = "Loop" ! ptimer(sp2_timer)%tname = " SP2" - ! ptimer(genx_timer)%tname = " GenX" + ! ptimer(genx_timer)%tname = " GenX" ! ptimer(new_timer)%tname = " New" ! @@ -133,7 +133,7 @@ subroutine timer_prg_init() integer :: i ! Increment when adding a new timer - num_timers = 24 + num_timers = 24 allocate(ptimer(num_timers)) @@ -178,19 +178,19 @@ subroutine timer_prg_init() ptimer(suball_timer)%tname = " SubAll" ptimer(bmult_timer)%tname = " BMult" ptimer(badd_timer)%tname = " BAdd" - ptimer(dyn_timer)%tname = " " !Reserved for dynamic timing - ptimer(mdloop_timer)%tname = "MDLoop" - ptimer(buildz_timer)%tname = " BuildZ" - ptimer(realcoul_timer)%tname = " RealCoul" - ptimer(recipcoul_timer)%tname = " RecipCoul" - ptimer(pairpot_timer)%tname = " PairPot" - ptimer(halfverlet_timer)%tname = " HalfVerlet" - ptimer(pos_timer)%tname = " Pos" - ptimer(nlist_timer)%tname = " NList" + ptimer(dyn_timer)%tname = " " !Reserved for dynamic timing + ptimer(mdloop_timer)%tname = "MDLoop" + ptimer(buildz_timer)%tname = " BuildZ" + ptimer(realcoul_timer)%tname = " RealCoul" + ptimer(recipcoul_timer)%tname = " RecipCoul" + ptimer(pairpot_timer)%tname = " PairPot" + ptimer(halfverlet_timer)%tname = " HalfVerlet" + ptimer(pos_timer)%tname = " Pos" + ptimer(nlist_timer)%tname = " NList" do i = 1, num_timers - ptimer(i)%ttotal = 0 - ptimer(i)%tcount = 0 + ptimer(i)%ttotal = 0 + ptimer(i)%tcount = 0 end do end subroutine timer_prg_init @@ -214,10 +214,10 @@ end subroutine prg_timer_shutdown subroutine prg_timer_start(itimer,tag) integer, intent(in) :: itimer - character(len=*), intent(in), optional :: tag + character(len=*), intent(in), optional :: tag - if(present(tag))then - ptimer(itimer)%tname = tag + if(present(tag))then + ptimer(itimer)%tname = tag endif call system_clock(tstart_clock, tclock_rate, tclock_max) @@ -238,9 +238,9 @@ subroutine prg_timer_stop(itimer,verbose) call system_clock(tstop_clock, tclock_rate, tclock_max) tprg_delta = tstop_clock - ptimer(itimer)%tstart if(present(verbose))then - if(verbose.gt.0)then - write(*,*)"Time for "//trim(ptimer(itimer)%tname)//" = "//to_string(tprg_delta)//" ms" - endif + if(verbose.gt.0)then + write(*,*)"Time for "//trim(ptimer(itimer)%tname)//" = "//to_string(tprg_delta)//" ms" + endif endif ptimer(itimer)%ttotal = ptimer(itimer)%ttotal + tprg_delta ptimer(itimer)%tcount = ptimer(itimer)%tcount + 1 @@ -266,12 +266,12 @@ subroutine prg_timer_collect() !! Determine average of each timer across ranks do i = 1, num_timers - sendBuf(i) = float(ptimer(i)%ttotal)/float(tclock_rate) + sendBuf(i) = float(ptimer(i)%ttotal)/float(tclock_rate) enddo call sumRealParallel(sendBuf, recvBuf, num_timers); do i = 1, num_timers - ptimer(i)%tavg = recvBuf(i) / rranks + ptimer(i)%tavg = recvBuf(i) / rranks enddo !! Determine min and max across ranks and which rank @@ -279,18 +279,18 @@ subroutine prg_timer_collect() allocate(reduceRecvBuf(num_timers)) do i = 1, num_timers - reduceSendBuf(i)%val = float(ptimer(i)%ttotal)/float(tclock_rate) - reduceSendBuf(i)%rank = getMyRank() + reduceSendBuf(i)%val = float(ptimer(i)%ttotal)/float(tclock_rate) + reduceSendBuf(i)%rank = getMyRank() enddo call minRankRealParallel(reduceSendBuf, reduceRecvBuf, num_timers); do i = 1, num_timers - ptimer(i)%minValue = reduceRecvBuf(i)%val - ptimer(i)%minRank = reduceRecvBuf(i)%rank + ptimer(i)%minValue = reduceRecvBuf(i)%val + ptimer(i)%minRank = reduceRecvBuf(i)%rank enddo call maxRankRealParallel(reduceSendBuf, reduceRecvBuf, num_timers); do i = 1, num_timers - ptimer(i)%maxValue = reduceRecvBuf(i)%val - ptimer(i)%maxRank = reduceRecvBuf(i)%rank + ptimer(i)%maxValue = reduceRecvBuf(i)%val + ptimer(i)%maxRank = reduceRecvBuf(i)%rank enddo deallocate(reduceSendBuf) @@ -298,12 +298,12 @@ subroutine prg_timer_collect() !! Determine standard deviation do i = 1, num_timers - temp = float(ptimer(i)%ttotal)/float(tclock_rate) - ptimer(i)%tavg - sendBuf(i) = temp * temp; + temp = float(ptimer(i)%ttotal)/float(tclock_rate) - ptimer(i)%tavg + sendBuf(i) = temp * temp; enddo call sumRealParallel(sendBuf, recvBuf, num_timers); do i = 1, num_timers - ptimer(i)%tstdev = sqrt(recvBuf(i) / rranks) + ptimer(i)%tstdev = sqrt(recvBuf(i) / rranks) enddo deallocate(sendBuf) @@ -323,35 +323,35 @@ subroutine prg_timer_results() ! Print timer results if (printRank() .eq. 1) then - write(*,*) "" - write(*,*) "Timings for Rank ", getMyRank() - write(*,*) "Timer # Calls Avg/Call (s) Total (s) % Time" - write(*,*) "" - - do i = 1, num_timers - if (ptimer(i)%tcount .gt. 0) then - !! ptimer(i)%tavg = (float(ptimer(i)%ttotal)/float(tclock_rate))/float(ptimer(i)%tcount) - ptimer(i)%tsum = float(ptimer(i)%ttotal)/float(tclock_rate) - ptimer(i)%tpercent = (ptimer(i)%tsum / ptimer(1)%tsum) * 100.0 - write(*,10) ptimer(i)%tname, ptimer(i)%tcount, ptimer(i)%tsum/float(ptimer(i)%tcount), ptimer(i)%tsum, ptimer(i)%tpercent -10 format(A23, I6, 3G16.6) - end if - end do - - write(*,*) "" - write(*,*) "Timing Statistics Across ", getNRanks(), " Ranks:" - write(*,*) "Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)" - write(*,*) - - do i = 1, num_timers - if (ptimer(i)%tcount > 0) then - write(*, 20) ptimer(i)%tname, & - ptimer(i)%minRank, ptimer(i)%minValue, & - ptimer(i)%maxRank, ptimer(i)%maxValue, & - ptimer(i)%tavg, ptimer(i)%tstdev -20 format(A23,2X,I4,G16.6,I4,3G16.6) - endif - enddo + write(*,*) "" + write(*,*) "Timings for Rank ", getMyRank() + write(*,*) "Timer # Calls Avg/Call (s) Total (s) % Time" + write(*,*) "" + + do i = 1, num_timers + if (ptimer(i)%tcount .gt. 0) then + !! ptimer(i)%tavg = (float(ptimer(i)%ttotal)/float(tclock_rate))/float(ptimer(i)%tcount) + ptimer(i)%tsum = float(ptimer(i)%ttotal)/float(tclock_rate) + ptimer(i)%tpercent = (ptimer(i)%tsum / ptimer(1)%tsum) * 100.0 + write(*,10) ptimer(i)%tname, ptimer(i)%tcount, ptimer(i)%tsum/float(ptimer(i)%tcount), ptimer(i)%tsum, ptimer(i)%tpercent +10 format(A23, I6, 3G16.6) + end if + end do + + write(*,*) "" + write(*,*) "Timing Statistics Across ", getNRanks(), " Ranks:" + write(*,*) "Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)" + write(*,*) + + do i = 1, num_timers + if (ptimer(i)%tcount > 0) then + write(*, 20) ptimer(i)%tname, & + ptimer(i)%minRank, ptimer(i)%minValue, & + ptimer(i)%maxRank, ptimer(i)%maxValue, & + ptimer(i)%tavg, ptimer(i)%tstdev +20 format(A23,2X,I4,G16.6,I4,3G16.6) + endif + enddo endif end subroutine prg_timer_results @@ -372,7 +372,7 @@ subroutine prg_print_date_and_time(tag) implicit none character(len=*), intent(in) :: tag - character(2) :: monthchar, daychar,hourchar,minchar,secchar + character(2) :: monthchar, daychar,hourchar,minchar,secchar integer :: sec, mins, hour, day, month, year integer :: timevector(8) @@ -394,14 +394,14 @@ function int2char(ival) implicit none - integer, intent(in) :: ival + integer, intent(in) :: ival character(2) :: int2char, myintchar if ((ival/10) .lt. 1) then - write(myintchar,'(I2)') ival - myintchar="0"//trim(adjustl(myintchar)) + write(myintchar,'(I2)') ival + myintchar="0"//trim(adjustl(myintchar)) else - write(myintchar,'(I2)') ival + write(myintchar,'(I2)') ival endif int2char = myintchar diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index 54f4ecf1..a19cb5b2 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -1,7 +1,7 @@ !> Pre-conditioned O(N) calculation of the kernel for XL-BOMD. !! \ingroup PROGRESS -!! \brief Here are subroutines implementing Niklasson's scheme for -!! low-rank, Krylov subspace approximation of the kernel. +!! \brief Here are subroutines implementing Niklasson's scheme for +!! low-rank, Krylov subspace approximation of the kernel. module prg_xlbokernel_mod use omp_lib @@ -11,7 +11,7 @@ module prg_xlbokernel_mod use prg_timer_mod use prg_parallel_mod use prg_ewald_mod - use prg_implicit_fermi_mod + use prg_implicit_fermi_mod implicit none @@ -27,809 +27,810 @@ module prg_xlbokernel_mod contains -subroutine Invert(A,AI,N) - -implicit none -integer, parameter :: PREC = 8 -integer, intent(in) :: N -real(PREC), intent(in) :: A(N,N) -real(PREC), intent(out) :: AI(N,N) -real(PREC) :: WORK(N+N*N)!, C(N,N) -integer :: LDA, LWORK, M, INFO, IPIV(N) -integer :: I,J,K - -external DGETRF -external DGETRI - -AI = A -LDA = N -M = N -LWORK = N+N*N - -call DGETRF(M, N, AI, LDA, IPIV, INFO) -call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) - -end subroutine Invert - -!> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) -!! \param KRes The low rank approximation -!! \param KK0_bml The pre-conditioner K0. -!! \param Res The residual q[n]-n -!! \param FelTol Relative error tolerance for approximation -!! \param L Number of vectors used. -!! \param LMAX Maximum nr of vectors to use. -!! \param NUMRANK Nr of vectors to use. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param mu The chemical potiential. -!! \param beta Scaled inverse temperature. -!! \param RXYZ Nuclear coordinates. -!! \param Box Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Pointer List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy. -!! \param nebcoul Neighbour lists. -!! \param totnebcoul Number of neighbours list. -!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Nocc Occupation. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param Nr_elem Number of elements in Hubbard list. -subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & - S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) - -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + subroutine Invert(A,AI,N) + + implicit none + integer, parameter :: PREC = 8 + integer, intent(in) :: N + real(PREC), intent(in) :: A(N,N) + real(PREC), intent(out) :: AI(N,N) + real(PREC) :: WORK(N+N*N)!, C(N,N) + integer :: LDA, LWORK, M, INFO, IPIV(N) + integer :: I,J,K + + external DGETRF + external DGETRI + + AI = A + LDA = N + M = N + LWORK = N+N*N + + call DGETRF(M, N, AI, LDA, IPIV, INFO) + call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) + + end subroutine Invert + + !> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) + !! \param KRes The low rank approximation + !! \param KK0_bml The pre-conditioner K0. + !! \param Res The residual q[n]-n + !! \param FelTol Relative error tolerance for approximation + !! \param L Number of vectors used. + !! \param LMAX Maximum nr of vectors to use. + !! \param NUMRANK Nr of vectors to use. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param mu The chemical potiential. + !! \param beta Scaled inverse temperature. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy. + !! \param nebcoul Neighbour lists. + !! \param totnebcoul Number of neighbours list. + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Nocc Occupation. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) + real(dp), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(20) :: bml_type + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It,N,MN + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp, start, finish + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + + call timer_prg_init() + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + + call bml_transpose(Z_bml,ZT_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_set_row(Res_bml,1,Res,1.0_dp*1e-10) + call bml_transpose(KK0_bml,KK0T_bml) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish + I = I + 1 + !write(*,*) 'dr =', norm2(dr) + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + ! Compute H1 = H(v) + dq_v = v + call prg_timer_start(1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & + dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu - real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) - real(dp), intent(in) :: Hubbard_U(Nr_elem) - type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(20) :: bml_type - integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(inout) :: KRes(Nr_atoms) - integer :: I,J,K,It,N,MN - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp, start, finish - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml - - call timer_prg_init() - bml_type = bml_get_type(HO_bml) - N = bml_get_N(HO_bml) - MN = bml_get_M(HO_bml) - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) - - call bml_transpose(Z_bml,ZT_bml) - - ! K0Res = KK0*Res temporary for matrix-vector multiplication - call bml_set_row(Res_bml,1,Res,threshold) - call bml_transpose(KK0_bml,KK0T_bml) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) - call bml_get_row(K0Res_bml,1,row_NA) - K0Res = row_NA - dr = K0Res - - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call prg_timer_stop(1,1) - ! Compute H1 = H(v) - dq_v = v - call prg_timer_start(1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & - dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - call prg_timer_stop(1,1) + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) - call bml_deallocate(H1_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1,Nr_atoms-1 + do K = Hinxlist(It)+1,Hinxlist(It+1) + row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1,Nr_atoms-1 - do K = Hinxlist(It)+1,Hinxlist(It+1) - row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) - enddo - enddo - !$OMP END PARALLEL DO - do K = Hinxlist(Nr_atoms)+1,HDIM - row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) - enddo - - ! H1 = 1/2(S*H1+H1*S) - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(X_bml,H1_bml) - - ! H1 = Z^T H1 Z - call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - ! D1 = Z D1 Z^T - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - ! Compute dq/dv - call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) - call bml_get_diagonal(X_bml,row1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1, Nr_atoms-1 - dq_dv(It) = 0 - do K = Hinxlist(It)+1,Hinxlist(It+1) + ! H1 = 1/2(S*H1+H1*S) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + ! H1 = Z^T H1 Z + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + ! D1 = Z D1 Z^T + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute dq/dv + call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) dq_dv(It) = dq_dv(It) + row1(K) - enddo - enddo - !$OMP END PARALLEL DO - dq_dv(Nr_atoms) = 0 - do K = Hinxlist(Nr_atoms)+1,HDIM - dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,1.0_dp*1e-10) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + enddo - dr = dq_dv - v - ! fi = K0(dq_dv - v) - call bml_set_row(Res_bml,1,dr,threshold) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + call bml_deallocate(ZT_bml) + call prg_timer_shutdown() + + end subroutine prg_kernel_multirank_latte + + ! Above routine but for development code + subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_transpose(KK0_bml,KK0T_bml) + call bml_set_row(Res_bml,1,Res,ONE*1e-14) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) call bml_get_row(K0Res_bml,1,row_NA) - dr = row_NA - fi(:,I) = dr - - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo - enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*K0Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) - enddo - enddo - Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - deallocate(O, M) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i - enddo + dq_v = v - deallocate(row1);deallocate(row2);deallocate(row_NA) - call bml_deallocate(KK0T_bml) - call bml_deallocate(K0Res_bml) - call bml_deallocate(Res_bml) - call bml_deallocate(ZT_bml) - call prg_timer_shutdown() + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO - end subroutine prg_kernel_multirank_latte + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - ! Above routine but for development code - subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & - S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu - real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) - real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) - type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(10), intent(in) :: Element_Type(Nr_atoms) - integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(inout) :: KRes(Nr_atoms) - integer :: I,J,K,It - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml - - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) - - ! K0Res = KK0*Res temporary for matrix-vector multiplication - call bml_transpose(KK0_bml,KK0T_bml) - call bml_set_row(Res_bml,1,Res,ONE*1e-14) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) - call bml_get_row(K0Res_bml,1,row_NA) - K0Res = row_NA - dr = K0Res - - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) - dq_v = v + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & - dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - row1 = 0.0_dp - do J = 1,HDIM - call bml_set_row(H1_bml,J,row1,threshold) - enddo + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) - do J = 1,Nr_atoms - do K = H_INDEX_START(J),H_INDEX_END(J) - row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) - call bml_get_row(S_bml,K,row1) - call bml_get_row(D1_bml,K,row2) - dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) - dr = dq_dv - v - ! fi = K0(dq_dv - v) - call bml_set_row(Res_bml,1,dr,threshold) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) - call bml_get_row(K0Res_bml,1,row_NA) - dr = row_NA - fi(:,I) = dr - - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*K0Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) - enddo - enddo - Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - deallocate(O, M) - - enddo - deallocate(row1);deallocate(row2);deallocate(row_NA) - call bml_deallocate(KK0T_bml) - call bml_deallocate(K0Res_bml) - call bml_deallocate(Res_bml) + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) end subroutine prg_kernel_multirank -!> Compute full inverse Jacobian of q[n]-n (for LATTE) -!! \param KK The inverse Jacobian. -!! \param DO_bml Orthogonalized density matrix. -!! \param mu0 The chemical potiential. -!! \param RXYZ Nuclear coordinates. -!! \param Box Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Pointer List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy -!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param Nocc Occupation. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param beta Scaled inverse temperature. -!! \param Nr_elem Number of elements in Hubbard list. -subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & -Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & -Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & -Nocc,m_rec,threshold,beta,Nr_elem) - -use bml - -implicit none -integer, parameter :: PREC = 8, dp = kind(1.0d0) -integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 -real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K -real(PREC), intent(in) :: Coulomb_acc, threshold,beta -real(PREC) :: v(Nr_atoms) -real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) -integer, intent(in) :: Hinxlist(HDIM) -real(PREC), intent(in) :: Hubbard_U(Nr_elem) -type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) -type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml -type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml -real(PREC), intent(inout) :: mu0 -integer, intent(in) :: Element_Pointer(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) -real(PREC) :: dq_v(Nr_atoms) -real(PREC) :: dq_dv(Nr_atoms), err,tol -real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) -integer :: I,J,K, ITER, mm,It,N,MN -real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) -type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml -character(20) :: bml_type - - bml_type = bml_get_type(HO_bml) - N = bml_get_N(HO_bml) - MN = bml_get_M(HO_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) - call bml_transpose(Z_bml,ZT_bml) - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) - Coulomb_Pot_dq_v = ZERO - Coulomb_Pot_k = ZERO - dq_v = ZERO - JJ = ZERO - KK = ZERO - - do J = 1,Nr_atoms - dq_v(J) = ONE - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) - do I = 1,Nr_atoms + !> Compute full inverse Jacobian of q[n]-n (for LATTE) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & + Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & + Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Nocc,m_rec,threshold,beta,Nr_elem) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, threshold,beta + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) + integer, intent(in) :: Hinxlist(HDIM) + real(PREC), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + real(PREC), intent(inout) :: mu0 + integer, intent(in) :: Element_Pointer(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms), err,tol + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It,N,MN + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml + character(20) :: bml_type + + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_transpose(Z_bml,ZT_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & - dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & - Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - call bml_deallocate(H1_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) - do I = 1,Nr_atoms-1 - do K = Hinxlist(I)+1,Hinxlist(I+1) - row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) - enddo - enddo - !$OMP END PARALLEL DO - do K = Hinxlist(Nr_atoms)+1,HDIM - row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) - enddo + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & + Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms-1 + do K = Hinxlist(I)+1,Hinxlist(I+1) + row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(X_bml,H1_bml) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) - call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_get_diagonal(X_bml,row1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1, Nr_atoms-1 - dq_dv(It) = 0 - do K = Hinxlist(It)+1,Hinxlist(It+1) + call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) dq_dv(It) = dq_dv(It) + row1(K) - enddo - JJ(It,J) = dq_dv(It) - enddo - !$OMP END PARALLEL DO - dq_dv(Nr_atoms) = 0 - do K = Hinxlist(Nr_atoms)+1,HDIM - dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(It,J) = dq_dv(It) + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + dq_v = ZERO enddo - JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + + call Invert(JJ,KK,Nr_atoms) + + deallocate(row1); deallocate(row2); deallocate(JJ) + call bml_deallocate(ZT_bml) + + end subroutine prg_full_kernel_latte + + !> Compute full inverse Jacobian of q[n]-n (for development code) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RX,RY,RZ Nuclear coordinates. + !! \param Lbox Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Type List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param TIMERATIO Parameter for Ewald + !! \param nnRx,nnRy,nnRz Neighbour lists. + !! \param nrnnlist Number of neighbours list. + !! \param nnType Refers to original order of atoms. + !! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param Znuc List of nuclear charges. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param diagonal Auxillary vector. + + subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & + nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Nocc,Znuc,m_rec,threshold,beta,diagonal) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + real(PREC), intent(inout) :: mu0, diagonal(HDIM) + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms) + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO dq_v = ZERO - enddo - - do I = 1,Nr_atoms - JJ(I,I) = JJ(I,I) - ONE - enddo - - call Invert(JJ,KK,Nr_atoms) - - deallocate(row1); deallocate(row2); deallocate(JJ) - call bml_deallocate(ZT_bml) - -end subroutine prg_full_kernel_latte - -!> Compute full inverse Jacobian of q[n]-n (for development code) -!! \param KK The inverse Jacobian. -!! \param DO_bml Orthogonalized density matrix. -!! \param mu0 The chemical potiential. -!! \param RX,RY,RZ Nuclear coordinates. -!! \param Lbox Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Type List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy -!! \param TIMERATIO Parameter for Ewald -!! \param nnRx,nnRy,nnRz Neighbour lists. -!! \param nrnnlist Number of neighbours list. -!! \param nnType Refers to original order of atoms. -!! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param Nocc Occupation. -!! \param Znuc List of nuclear charges. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param beta Scaled inverse temperature. -!! \param diagonal Auxillary vector. - -subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & -Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & -nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & -Nocc,Znuc,m_rec,threshold,beta,diagonal) - -use bml - -implicit none -integer, parameter :: PREC = 8, dp = kind(1.0d0) -integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 -real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K -real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta -real(PREC) :: v(Nr_atoms) -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) -integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) -real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) -type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) -type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml -type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml -real(PREC), intent(inout) :: mu0, diagonal(HDIM) -character(10), intent(in) :: Element_Type(Nr_atoms) -integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) -real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) -real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) -real(PREC) :: dq_v(Nr_atoms) -real(PREC) :: dq_dv(Nr_atoms) -real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) -integer :: I,J,K, ITER, mm,It -real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) - - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) - Coulomb_Pot_dq_v = ZERO - Coulomb_Pot_k = ZERO - dq_v = ZERO - JJ = ZERO - KK = ZERO - - do J = 1,Nr_atoms - dq_v(J) = ONE - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) - do I = 1,Nr_atoms + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms call Ewald_Real_Space_Single(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,I,RX,RY,RZ,LBox, & - dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) + dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & - TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - diagonal = 0.0_dp - do I = 1,HDIM - call bml_set_row(H1_bml,I,diagonal,threshold) - enddo - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) - do I = 1,Nr_atoms - do K = H_INDEX_START(I),H_INDEX_END(I) - diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,diagonal,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + diagonal = 0.0_dp + do I = 1,HDIM + call bml_set_row(H1_bml,I,diagonal,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms + do K = H_INDEX_START(I),H_INDEX_END(I) + diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,diagonal,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) call bml_get_row(S_bml,K,row1) call bml_get_row(D1_bml,K,row2) dq_dv(It) = dq_dv(It) + dot_product(row1,row2) - enddo - JJ(It,J) = dq_dv(It) + enddo + JJ(It,J) = dq_dv(It) + enddo + dq_v = ZERO enddo - dq_v = ZERO - enddo - do I = 1,Nr_atoms - JJ(I,I) = JJ(I,I) - ONE - enddo - call Invert(JJ,KK,Nr_atoms) - deallocate(row1); deallocate(row2); deallocate(JJ) + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + call Invert(JJ,KK,Nr_atoms) + deallocate(row1); deallocate(row2); deallocate(JJ) + + end subroutine prg_full_kernel + + + ! Compute the low-rank kernel matrix. (For development code) + subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: KK0_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(out) :: KRes(Nr_atoms) + integer :: I,J,K,It,col,row + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp,elem + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) + + allocate(row1(HDIM));allocate(row2(HDIM)); + + dr = Res + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i -end subroutine prg_full_kernel + dq_v = v + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO -! Compute the low-rank kernel matrix. (For development code) - subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & - S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu - real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) - real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) - type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - type(bml_matrix_t),intent(inout) :: KK0_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(10), intent(in) :: Element_Type(Nr_atoms) - integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(out) :: KRes(Nr_atoms) - integer :: I,J,K,It,col,row - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp,elem - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) - - allocate(row1(HDIM));allocate(row2(HDIM)); - - dr = Res - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO - dq_v = v + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & - dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - row1 = 0.0_dp - do J = 1,HDIM - call bml_set_row(H1_bml,J,row1,threshold) - enddo + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) - do J = 1,Nr_atoms - do K = H_INDEX_START(J),H_INDEX_END(J) - row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) - call bml_get_row(S_bml,K,row1) - call bml_get_row(D1_bml,K,row2) - dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - dr = dq_dv - v - fi(:,I) = dr + dr = dq_dv - v + fi(:,I) = dr - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo - enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) - enddo + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + + ! Does not work, need to normalize matrix + if ((Fel > FelTol).AND.(I < (LMAX))) then + deallocate(O, M) + else + do row = 1,Nr_atoms + do col = 1,Nr_atoms + elem = 0.0 + do K = 1,L + do J = 1,L + elem = elem + M(J,K)*vi(row,J)*fi(col,K) + enddo + enddo + if (abs(elem) > threshold) then + call bml_set_element(KK0_bml,row,col,elem) + endif + enddo + enddo + deallocate(O,M) + endif enddo - Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - - ! Does not work, need to normalize matrix - if ((Fel > FelTol).AND.(I < (LMAX))) then - deallocate(O, M) - else - do row = 1,Nr_atoms - do col = 1,Nr_atoms - elem = 0.0 - do K = 1,L - do J = 1,L - elem = elem + M(J,K)*vi(row,J)*fi(col,K) - enddo - enddo - if (abs(elem) > threshold) then - call bml_set_element(KK0_bml,row,col,elem) - endif - enddo - enddo - deallocate(O,M) - endif - enddo deallocate(row1); deallocate(row2); end subroutine prg_kernel_matrix_multirank diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cbc484e7..f2d8d5df 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -56,6 +56,7 @@ progress_test(prg_density_T) progress_test(prg_density_T_fermi) progress_test(prg_sp2_basic) progress_test(prg_implicit_fermi) +progress_test(prg_implicit_fermi_save_inverse) progress_test(prg_sp2_alg1_dense) progress_test(prg_sp2_alg2_dense) progress_test(prg_sp2_alg1_ellpack) diff --git a/tests/src/main.F90 b/tests/src/main.F90 index 995644c0..3ab0354f 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -32,6 +32,7 @@ program main implicit none integer :: norb, mdim, verbose + type(bml_matrix_t) :: inv_bml(10) type(bml_matrix_t) :: ham_bml type(bml_matrix_t) :: rho_bml, rho1_bml type(bml_matrix_t) :: rho_ortho_bml @@ -59,7 +60,7 @@ program main real(dp) :: mineval, maxeval, occerrlimit real(dp), allocatable :: gbnd(:) integer :: minsp2iter, icount, nodesPerPart, occsteps - integer :: norecs + integer :: norecs,occiter,i integer :: maxsp2iter, npts, sp2all_timer, sp2all_timer_init integer, allocatable :: pp(:), signlist(:) real(dp), allocatable :: vv(:) @@ -95,7 +96,7 @@ program main !The following Hamiltonian belongs to a water box structure !which was precalculated with dftb+ -! call h_read(ham,nOrb) + ! call h_read(ham,nOrb) !Convert the Hamiltonian to bml @@ -108,1006 +109,1041 @@ program main case("prg_density") !Diagonalize H and build \rho - write(*,*) "Testing the construction of the density matrix from density_mod" + write(*,*) "Testing the construction of the density matrix from density_mod" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call prg_build_density_T0(ham_bml, rho_bml, threshold, bndfil) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - write(*,*)"Idempotency for prg_build_density_T0",idempotency - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call prg_build_density_T0(ham_bml, rho_bml, threshold, bndfil) + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + write(*,*)"Idempotency for prg_build_density_T0",idempotency + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif case("prg_density_T") !Diagonalize H and build \rho with electronic temperature KbT - write(*,*) "Testing the construction of the density at KbT > 0 matrix from density_mod" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + write(*,*) "Testing the construction of the density at KbT > 0 matrix from density_mod" + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call prg_build_density_T(ham_bml, rho_bml, threshold, bndfil, 0.01_dp, mu) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - write(*,*)"Idempotency for prg_build_density_T0",idempotency - write(*,*)"Fermi level:",mu - if(idempotency.gt.1.0D-5)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call prg_build_density_T(ham_bml, rho_bml, threshold, bndfil, 0.01_dp, mu) + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + write(*,*)"Idempotency for prg_build_density_T0",idempotency + write(*,*)"Fermi level:",mu + if(idempotency.gt.1.0D-5)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif case("prg_density_T_fermi") !Diagonalize H and build \rho with electronic temperature KbT and with chemical potential mu - write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from density_mod" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from density_mod" + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call prg_build_density_T_Fermi(ham_bml, rho_bml, threshold,0.01_dp, -0.10682896819759_dp, 1) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - write(*,*)"Idempotency for prg_build_density_T0",idempotency - write(*,*)"Fermi level:",mu - if(idempotency.gt.1.0D-5)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call prg_build_density_T_Fermi(ham_bml, rho_bml, threshold,0.01_dp, -0.10682896819759_dp, 1) + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + write(*,*)"Idempotency for prg_build_density_T0",idempotency + write(*,*)"Fermi level:",mu + if(idempotency.gt.1.0D-5)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif case("prg_density_cheb_fermi") !Use Chebyshev expansion to build the density matrix - write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from chebyshev_mod" + write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from chebyshev_mod" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - mu=1.0_dp - write(*,*)"mu",mu - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_build_density_T_fermi(ham_bml, rho_bml, threshold,0.01_dp, -0.10682896819759_dp, 0) - call prg_build_density_cheb_fermi(ham_bml,rho1_bml,1.0_dp,& - threshold,200,0.01_dp,mu,bndfil,.true.,0.001_dp,& - .true.,2000,.false.,1) - call bml_add_deprecated(1.0_dp,rho1_bml,-1.0_dp,rho_bml,0.0_dp) - error_calc = bml_fnorm(rho1_bml) - if(error_calc.gt.0.1_dp)then - write(*,*) "Error in Chebyshev expansion","Error = ",error_calc - error stop - endif + mu=1.0_dp + write(*,*)"mu",mu + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) + call prg_build_density_T_fermi(ham_bml, rho_bml, threshold,0.01_dp, -0.10682896819759_dp, 0) + call prg_build_density_cheb_fermi(ham_bml,rho1_bml,1.0_dp,& + threshold,200,0.01_dp,mu,bndfil,.true.,0.001_dp,& + .true.,2000,.false.,1) + call bml_add_deprecated(1.0_dp,rho1_bml,-1.0_dp,rho_bml,0.0_dp) + error_calc = bml_fnorm(rho1_bml) + if(error_calc.gt.0.1_dp)then + write(*,*) "Error in Chebyshev expansion","Error = ",error_calc + error stop + endif case("prg_implicit_fermi") !Use implicit recursive expansion by Niklasson to build density matrix - write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from implicit_fermi_mod" - - mu = 0.2_dp - beta = 4.0_dp !nocc,osteps,occerrlimit + write(*,*) "Testing the construction of the density matrix at KbT > 0 and at mu = Ef from implicit_fermi_mod" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + mu = 0.2_dp + beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + bml_type = 'ellpack' - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_implicit_fermi(ham_bml, rho1_bml, 10, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 10e-8_dp) + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - mu = 0.2_dp - - call prg_test_density_matrix(ham_bml, rho_bml, beta, mu, 10.0_dp, 1, 1.0_dp, threshold) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) + call prg_implicit_fermi(ham_bml, rho1_bml, norecs, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 1e-6_dp) - call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) + mu = 0.2_dp - error_calc = bml_fnorm(rho1_bml) - if(error_calc.gt.0.1_dp)then - write(*,*) "Error in Implicit Fermi expansion ","Error = ",error_calc - error stop - endif + call prg_test_density_matrix(ham_bml, rho_bml, beta, mu, 10.0_dp, 1, 1.0_dp, threshold) + + call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) + + error_calc = bml_fnorm(rho1_bml) + write(*,*) error_calc + if(error_calc.gt.0.1_dp)then + write(*,*) "Error in Implicit Fermi expansion ","Error = ",error_calc + error stop + endif + + case("prg_implicit_fermi_save_inverse") + + mu = 0.2_dp + beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + nocc = 10.0_dp + + do i = 1,norecs + call bml_identity_matrix(bml_type,bml_element_real,dp,norb,norb,inv_bml(i)) + enddo + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) + write(*,*) mu + mu = 0.2_dp + call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) + + write(*,*) mu + call bml_scale(0.5_dp,rho_bml) + call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) + + error_calc = bml_fnorm(rho1_bml) + if(error_calc.gt.0.1_dp)then + write(*,*) "Error in Implicit Fermi expansion save inverse","Error = ",error_calc + error stop + endif case("prg_sp2_basic") !Sp2 original version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call prg_timer_start(sp2_timer) - call prg_sp2_basic(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol,verbose) - call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call prg_timer_start(sp2_timer) + call prg_sp2_basic(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol,verbose) + call prg_timer_stop(sp2_timer) + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_dense") !Sp2 algorithm 1 - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call prg_timer_start(sp2_timer) - call prg_sp2_alg1(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg1(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol) + call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_dense") !Sp2 algorithm 2 - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call prg_timer_start(sp2_timer) - call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol) + call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_ellpack") !Sp2 algorithm 1 - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - call prg_timer_start(sp2_timer) - call prg_sp2_alg1(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg1(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol) + call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_ellpack") !Sp2 algorithm 2 ellpack version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - call prg_timer_start(sp2_timer) - call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol) + call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_ellpack_poly") !Sp2 algorithm 2 ellpack version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1.0d-2 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 288 - threshold = 1.0d-5 - sp2tol = 1.0d-10 + idempotency_tol = 1.0d-2 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 288 + threshold = 1.0d-5 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - call prg_timer_start(sp2_timer) - call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & - ,sp2conv,sp2tol) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2(ham_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter & + ,sp2conv,sp2tol) + call prg_timer_stop(sp2_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml, threshold, idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml, threshold, idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_seq_dense") !Sp2 algorithm 1 sequence version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - allocate(pp(100),vv(100)) - icount = 0 + allocate(pp(100),vv(100)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg1_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) + call prg_timer_start(sp2_timer) + call prg_sp2_alg1_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) - call prg_sp2_alg1_seq(ham_bml, rho_bml, threshold, pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_sp2_alg1_seq(ham_bml, rho_bml, threshold, pp, icount, vv) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv) + deallocate(pp, vv) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_seq_dense") !Sp2 algorithm 2 sequence version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - allocate(pp(100),vv(100)) - icount = 0 + allocate(pp(100),vv(100)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) - call prg_sp2_alg2_seq(ham_bml, rho_bml, threshold, pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_sp2_alg2_seq(ham_bml, rho_bml, threshold, pp, icount, vv) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv) + deallocate(pp, vv) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_seq_ellpack") !Sp2 algorithm 1 sequence version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - allocate(pp(100),vv(100)) - icount = 0 + allocate(pp(100),vv(100)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg1_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) + call prg_timer_start(sp2_timer) + call prg_sp2_alg1_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) - call prg_sp2_alg1_seq(ham_bml, rho_bml, threshold, pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_sp2_alg1_seq(ham_bml, rho_bml, threshold, pp, icount, vv) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv) + deallocate(pp, vv) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_seq_ellpack") !Sp2 algorithm 2 sequence version - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - allocate(pp(100),vv(100)) - icount = 0 + allocate(pp(100),vv(100)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) - call prg_sp2_alg2_seq(ham_bml, rho_bml, threshold, pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_sp2_alg2_seq(ham_bml, rho_bml, threshold, pp, icount, vv) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv) + deallocate(pp, vv) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_seq_inplace_dense") !SP2 algorithm 1 seq version in place - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - allocate(pp(100),vv(100), gbnd(2)) - icount = 0 + allocate(pp(100),vv(100), gbnd(2)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) + call prg_timer_stop(sp2_timer) - call bml_copy(ham_bml, rho_bml) - call bml_gershgorin(rho_bml, gbnd) + call bml_copy(ham_bml, rho_bml) + call bml_gershgorin(rho_bml, gbnd) - call prg_timer_start(sp2_timer) - call prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, & - vv, gbnd(1), gbnd(2)) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, & + vv, gbnd(1), gbnd(2)) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv, gbnd) + deallocate(pp, vv, gbnd) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_seq_inplace_dense") !SP2 algorithm 2 seq version in place - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - bml_type = "dense" + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - allocate(pp(100),vv(100), gbnd(2)) - icount = 0 + allocate(pp(100),vv(100), gbnd(2)) + icount = 0 - call prg_timer_start(sp2_timer) + call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) + call prg_timer_stop(sp2_timer) - call bml_copy(ham_bml, rho_bml) - call bml_gershgorin(rho_bml, gbnd) + call bml_copy(ham_bml, rho_bml) + call bml_gershgorin(rho_bml, gbnd) - call prg_timer_start(sp2_timer) - call prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, & - vv, gbnd(1), gbnd(2)) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, & + vv, gbnd(1), gbnd(2)) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv, gbnd) + deallocate(pp, vv, gbnd) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg1_seq_inplace_ellpack") !SP2 algorithm 1 seq version in place - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - allocate(pp(100),vv(100), gbnd(2)) - icount = 0 + allocate(pp(100),vv(100), gbnd(2)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) + call prg_timer_stop(sp2_timer) - call bml_copy(ham_bml, rho_bml) - call bml_gershgorin(rho_bml, gbnd) + call bml_copy(ham_bml, rho_bml) + call bml_gershgorin(rho_bml, gbnd) - call prg_timer_start(sp2_timer) - call prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, & - vv, gbnd(1), gbnd(2)) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, & + vv, gbnd(1), gbnd(2)) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv, gbnd) + deallocate(pp, vv, gbnd) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_sp2_alg2_seq_inplace_ellpack") !SP2 algorithm 2 seq version in place - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - idempotency_tol = 1d-6 - bml_type = "ellpack" - bndfil = 0.5_dp - norb = 6144 - mdim = 600 - threshold = 1.0d-9 - sp2tol = 1.0d-10 + idempotency_tol = 1d-6 + bml_type = "ellpack" + bndfil = 0.5_dp + norb = 6144 + mdim = 600 + threshold = 1.0d-9 + sp2tol = 1.0d-10 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - allocate(pp(100),vv(100), gbnd(2)) - icount = 0 + allocate(pp(100),vv(100), gbnd(2)) + icount = 0 - call prg_timer_start(sp2_timer) - call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, & - pp, icount, vv) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_sp2_alg2_genseq(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, & + pp, icount, vv) + call prg_timer_stop(sp2_timer) - call bml_copy(ham_bml, rho_bml) - call bml_gershgorin(rho_bml, gbnd) + call bml_copy(ham_bml, rho_bml) + call bml_gershgorin(rho_bml, gbnd) - call prg_timer_start(sp2_timer) - call prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, & - vv, gbnd(1), gbnd(2)) - call prg_timer_stop(sp2_timer) + call prg_timer_start(sp2_timer) + call prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, & + vv, gbnd(1), gbnd(2)) + call prg_timer_stop(sp2_timer) - deallocate(pp, vv, gbnd) + deallocate(pp, vv, gbnd) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif case("prg_sp2_fermi_dense") !SP2 algorithm 2 seq version in place - bml_type = "dense" - threshold = 1.0d-9 - mdim = -1 - verbose = 1 - norecs = 20 - nocc = bndfil*norb - tscale = 1.0_dp - occerrlimit = 1.0D-9 - tracelimit = 1.0D-12 - mu = 0.0_dp - beta0 = 20.1_dp - mineval = 0.0_dp - maxeval = 0.0_dp - eps = 1.0D-4 - occsteps = 0 - sp2all_timer = 2 - sp2all_timer_init = 3 - - allocate(signlist(30)) - signlist = 0 - - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - - call bml_zero_matrix(bml_type, bml_element_real, dp, norb, norb, orthox_bml) - - call prg_timer_start(sp2all_timer_init,"SP2INIT") - call prg_sp2_fermi_init_norecs(ham_bml, norecs, nocc, tscale,& - & threshold, occerrlimit, tracelimit, orthox_bml, mu,& - & beta0, mineval, maxeval, signlist, 10) - call prg_sp2_fermi_init(ham_bml, norecs, nocc, tscale,& - & threshold, occerrlimit, tracelimit, orthox_bml, mu,& - & beta0, mineval, maxeval, signlist) - call prg_timer_stop(sp2all_timer_init,10) - call prg_timer_start(sp2all_timer,"SP2FERMI") - call prg_sp2_fermi(ham_bml, occsteps, norecs,& - & nocc, mu, beta0, mineval, maxeval,& - & signlist, threshold, eps, tracelimit, orthox_bml) - - call prg_timer_stop(SP2ALL_TIMER,10) - - write(*,*)"BETA", beta0 - write(*,*)"ETEMP (eV)", 1.0_dp/beta0 - write(*,*)"NORECS USED", norecs - write(*,*)"CHEMPOT", mu - - kbt = 1.0_dp/beta0 - - call prg_build_density_T_fermi(ham_bml, rho_bml, threshold, kbt, mu, 0) - - call bml_add_deprecated(2.0_dp,orthox_bml,-1.0_dp,rho_bml,0.0_dp) - error_calc = bml_fnorm(orthox_bml) - - if(error_calc.gt.0.01_dp)then - write(*,*) "Error in sp2 Fermi","Error = ",error_calc - error stop - endif + bml_type = "dense" + threshold = 1.0d-9 + mdim = -1 + verbose = 1 + norecs = 20 + nocc = bndfil*norb + tscale = 1.0_dp + occerrlimit = 1.0D-9 + tracelimit = 1.0D-12 + mu = 0.0_dp + beta0 = 20.1_dp + mineval = 0.0_dp + maxeval = 0.0_dp + eps = 1.0D-4 + occsteps = 0 + sp2all_timer = 2 + sp2all_timer_init = 3 + + allocate(signlist(30)) + signlist = 0 + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + + call bml_zero_matrix(bml_type, bml_element_real, dp, norb, norb, orthox_bml) + + call prg_timer_start(sp2all_timer_init,"SP2INIT") + call prg_sp2_fermi_init_norecs(ham_bml, norecs, nocc, tscale,& + & threshold, occerrlimit, tracelimit, orthox_bml, mu,& + & beta0, mineval, maxeval, signlist, 10) + call prg_sp2_fermi_init(ham_bml, norecs, nocc, tscale,& + & threshold, occerrlimit, tracelimit, orthox_bml, mu,& + & beta0, mineval, maxeval, signlist) + call prg_timer_stop(sp2all_timer_init,10) + call prg_timer_start(sp2all_timer,"SP2FERMI") + call prg_sp2_fermi(ham_bml, occsteps, norecs,& + & nocc, mu, beta0, mineval, maxeval,& + & signlist, threshold, eps, tracelimit, orthox_bml) + + call prg_timer_stop(SP2ALL_TIMER,10) + + write(*,*)"BETA", beta0 + write(*,*)"ETEMP (eV)", 1.0_dp/beta0 + write(*,*)"NORECS USED", norecs + write(*,*)"CHEMPOT", mu + + kbt = 1.0_dp/beta0 + + call prg_build_density_T_fermi(ham_bml, rho_bml, threshold, kbt, mu, 0) + + call bml_add_deprecated(2.0_dp,orthox_bml,-1.0_dp,rho_bml,0.0_dp) + error_calc = bml_fnorm(orthox_bml) + + if(error_calc.gt.0.01_dp)then + write(*,*) "Error in sp2 Fermi","Error = ",error_calc + error stop + endif case("prg_equal_partition") ! Create equal partitions - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - call prg_timer_start(part_timer) - call prg_equalPartition(gp, 6, 72) - call prg_timer_stop(part_timer) + call prg_timer_start(part_timer) + call prg_equalPartition(gp, 6, 72) + call prg_timer_stop(part_timer) - call prg_printGraphPartitioning(gp) - if (gp%totalParts .ne. 12) then - write(*,*) "Number of parts is wrong ", gp%totalParts - call exit(-1) - endif + call prg_printGraphPartitioning(gp) + if (gp%totalParts .ne. 12) then + write(*,*) "Number of parts is wrong ", gp%totalParts + call exit(-1) + endif - call prg_destroyGraphPartitioning(gp) + call prg_destroyGraphPartitioning(gp) - call prg_timer_start(part_timer) - call prg_equalPartition(gp, 7, 72) - call prg_timer_stop(part_timer) + call prg_timer_start(part_timer) + call prg_equalPartition(gp, 7, 72) + call prg_timer_stop(part_timer) - call prg_printGraphPartitioning(gp) - if (gp%totalParts .ne. 11) then - write(*,*) "Number of parts is wrong ", gp%totalParts - !error stop(-1) - call exit(-1) - endif + call prg_printGraphPartitioning(gp) + if (gp%totalParts .ne. 11) then + write(*,*) "Number of parts is wrong ", gp%totalParts + !error stop(-1) + call exit(-1) + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_file_partition") ! Create partition from a file - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - call prg_timer_start(part_timer) - call prg_filePartition(gp, 'test.part') - call prg_timer_stop(part_timer) + call prg_timer_start(part_timer) + call prg_filePartition(gp, 'test.part') + call prg_timer_stop(part_timer) - call prg_printGraphPartitioning(gp) - if (gp%totalParts .ne. 104) then - write(*,*) "Number of parts is wrong ", gp%totalParts - error stop - endif + call prg_printGraphPartitioning(gp) + if (gp%totalParts .ne. 104) then + write(*,*) "Number of parts is wrong ", gp%totalParts + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_subgraphsp2_equal") ! Subgraph SP2 using equal size parts - call prg_timer_start(subgraph_timer) + call prg_timer_start(subgraph_timer) - bml_type = "ellpack" - norb = 6144 - mdim = 300 - threshold = 1.0d-5 - bndfil = 0.5_dp - gthreshold = 1.0d-3 - sp2tol = 1.0d-10 - errlimit = 1.0d-12 - nodesPerPart = 48 - idempotency_tol = 1.0d-2 + bml_type = "ellpack" + norb = 6144 + mdim = 300 + threshold = 1.0d-5 + bndfil = 0.5_dp + gthreshold = 1.0d-3 + sp2tol = 1.0d-10 + errlimit = 1.0d-12 + nodesPerPart = 48 + idempotency_tol = 1.0d-2 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) - call bml_read_matrix(ham_bml, "poly.512.mtx") + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,rho_bml) + call bml_read_matrix(ham_bml, "poly.512.mtx") - call prg_timer_start(loop_timer) - call test_subgraphloop(ham_bml, rho_bml, threshold, bndfil, & - minsp2iter, maxsp2iter, sp2conv, sp2tol, gthreshold, errlimit, & - nodesPerPart) - call prg_timer_stop(loop_timer) + call prg_timer_start(loop_timer) + call test_subgraphloop(ham_bml, rho_bml, threshold, bndfil, & + minsp2iter, maxsp2iter, sp2conv, sp2tol, gthreshold, errlimit, & + nodesPerPart) + call prg_timer_stop(loop_timer) - call bml_scale(0.5_dp, rho_bml) - call prg_check_idempotency(rho_bml,threshold,idempotency) + call bml_scale(0.5_dp, rho_bml) + call prg_check_idempotency(rho_bml,threshold,idempotency) - call prg_timer_stop(subgraph_timer) + call prg_timer_stop(subgraph_timer) - if(idempotency.gt.idempotency_tol)then - write(*,*) "Idempotency is too high", idempotency - error stop - endif + if(idempotency.gt.idempotency_tol)then + write(*,*) "Idempotency is too high", idempotency + error stop + endif -stop + stop case("prg_deorthogonalize_dense") !Deorthogonalization of the density matrix - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - error_tol = 1.0d-9 + error_tol = 1.0d-9 - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_ortho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_ortho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) - call bml_read_matrix(zmat_bml,'zmatrix.mtx') - call bml_read_matrix(rho_bml,'density.mtx') - call bml_read_matrix(rho_ortho_bml,'density_ortho.mtx') + call bml_read_matrix(zmat_bml,'zmatrix.mtx') + call bml_read_matrix(rho_bml,'density.mtx') + call bml_read_matrix(rho_ortho_bml,'density_ortho.mtx') - call prg_timer_start(deortho_timer) - call prg_deorthogonalize(rho_ortho_bml,zmat_bml,aux_bml,threshold,bml_type,verbose) - call prg_timer_stop(deortho_timer) + call prg_timer_start(deortho_timer) + call prg_deorthogonalize(rho_ortho_bml,zmat_bml,aux_bml,threshold,bml_type,verbose) + call prg_timer_stop(deortho_timer) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,rho_bml,0.0_dp) - error_calc = bml_fnorm(aux_bml) + call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,rho_bml,0.0_dp) + error_calc = bml_fnorm(aux_bml) - call bml_deallocate(nonortho_ham_bml) - call bml_deallocate(zmat_bml) - call bml_deallocate(aux_bml) + call bml_deallocate(nonortho_ham_bml) + call bml_deallocate(zmat_bml) + call bml_deallocate(aux_bml) - write(*,*)"prg_orthogonalize error ", error_calc + write(*,*)"prg_orthogonalize error ", error_calc - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_orthogonalize_dense") ! Orthogonalization of the Hamiltonian - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - error_tol = 1.0d-9 - bml_type = "dense" + error_tol = 1.0d-9 + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,nonortho_ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,nonortho_ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) - call bml_read_matrix(zmat_bml,'zmatrix.mtx') - call bml_read_matrix(nonortho_ham_bml,'hamiltonian.mtx') + call bml_read_matrix(zmat_bml,'zmatrix.mtx') + call bml_read_matrix(nonortho_ham_bml,'hamiltonian.mtx') - call prg_timer_start(ortho_timer) - call prg_orthogonalize(nonortho_ham_bml,zmat_bml,aux_bml,threshold,bml_type,verbose) - call prg_timer_stop(ortho_timer) + call prg_timer_start(ortho_timer) + call prg_orthogonalize(nonortho_ham_bml,zmat_bml,aux_bml,threshold,bml_type,verbose) + call prg_timer_stop(ortho_timer) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,ham_bml,0.0_dp) - error_calc = bml_fnorm(aux_bml) + call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,ham_bml,0.0_dp) + error_calc = bml_fnorm(aux_bml) - call bml_deallocate(nonortho_ham_bml) - call bml_deallocate(zmat_bml) - call bml_deallocate(aux_bml) + call bml_deallocate(nonortho_ham_bml) + call bml_deallocate(zmat_bml) + call bml_deallocate(aux_bml) - write(*,*)"Orthogonalize error ", error_calc + write(*,*)"Orthogonalize error ", error_calc - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif - call prg_timer_stop(ortho_timer) + call prg_timer_stop(ortho_timer) - case("prg_pulaycomponent0") + case("prg_pulaycomponent0") - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - error_tol = 1.0d-9 - bml_type = "dense" + error_tol = 1.0d-9 + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_ref_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_ref_bml) - call bml_read_matrix(zmat_bml,"zmatrix.mtx") - call bml_read_matrix(ham_bml,"hamiltonian.mtx") - call bml_read_matrix(rho_bml,"density.mtx") - call bml_read_matrix(pcm_ref_bml,"pcm.mtx") + call bml_read_matrix(zmat_bml,"zmatrix.mtx") + call bml_read_matrix(ham_bml,"hamiltonian.mtx") + call bml_read_matrix(rho_bml,"density.mtx") + call bml_read_matrix(pcm_ref_bml,"pcm.mtx") - call prg_timer_start(zdiag_timer) - call prg_PulayComponent0(rho_bml,ham_bml,pcm_bml,threshold,mdim,& - &bml_type,verbose) - call prg_timer_stop(zdiag_timer) + call prg_timer_start(zdiag_timer) + call prg_PulayComponent0(rho_bml,ham_bml,pcm_bml,threshold,mdim,& + &bml_type,verbose) + call prg_timer_stop(zdiag_timer) - write(*,*)"Pulay Component error ", error_calc + write(*,*)"Pulay Component error ", error_calc - call bml_add_deprecated(-1.0_dp,pcm_bml,1.0_dp,pcm_ref_bml,0.0_dp) - error_calc = bml_fnorm(pcm_bml) + call bml_add_deprecated(-1.0_dp,pcm_bml,1.0_dp,pcm_ref_bml,0.0_dp) + error_calc = bml_fnorm(pcm_bml) - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_buildzdiag") ! Building inverse overlap factor matrix (Lowdin method) - call prg_timer_start(loop_timer) + call prg_timer_start(loop_timer) - write(*,*) "Testing buildzdiag from prg_genz_mod" - error_tol = 1.0d-9 - bml_type = "dense" + write(*,*) "Testing buildzdiag from prg_genz_mod" + error_tol = 1.0d-9 + bml_type = "dense" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,over_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,over_bml) - call bml_read_matrix(zmat_bml,'zmatrix.mtx') - call bml_read_matrix(over_bml,'overlap.mtx') + call bml_read_matrix(zmat_bml,'zmatrix.mtx') + call bml_read_matrix(over_bml,'overlap.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) - ! - call prg_timer_start(zdiag_timer) - call prg_buildzdiag(over_bml,aux_bml,threshold,norb,bml_type) - call prg_timer_stop(zdiag_timer) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) + ! + call prg_timer_start(zdiag_timer) + call prg_buildzdiag(over_bml,aux_bml,threshold,norb,bml_type) + call prg_timer_stop(zdiag_timer) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) + call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) - error_calc = bml_fnorm(aux_bml) + error_calc = bml_fnorm(aux_bml) - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_buildzsparse") ! Building inverse overlap factor matrix (Lowdin method) - write(*,*) "Testing buildzsparse from prg_genz_mod" - error_tol = 1.0d-7 - bml_type = "ellpack" + write(*,*) "Testing buildzsparse from prg_genz_mod" + error_tol = 1.0d-7 + bml_type = "ellpack" - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk1_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk2_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk3_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk4_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk5_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk6_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,over_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zmat_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk2_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk3_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk4_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk5_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,zk6_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,over_bml) - call bml_read_matrix(zmat_bml,'zmatrix.mtx') - call bml_read_matrix(over_bml,'overlap.mtx') + call bml_read_matrix(zmat_bml,'zmatrix.mtx') + call bml_read_matrix(over_bml,'overlap.mtx') - call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) - ! - call prg_timer_start(zdiag_timer) - call prg_buildZsparse(over_bml,aux_bml,1,mdim,bml_type,zk1_bml,zk2_bml,zk3_bml& - &,zk4_bml,zk5_bml,zk6_bml,4,4,3,threshold,threshold,.true.,1) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,aux_bml) + ! + call prg_timer_start(zdiag_timer) + call prg_buildZsparse(over_bml,aux_bml,1,mdim,bml_type,zk1_bml,zk2_bml,zk3_bml& + &,zk4_bml,zk5_bml,zk6_bml,4,4,3,threshold,threshold,.true.,1) - call prg_buildzdiag(over_bml,aux_bml,threshold,norb,bml_type) - call prg_timer_stop(zdiag_timer) + call prg_buildzdiag(over_bml,aux_bml,threshold,norb,bml_type) + call prg_timer_stop(zdiag_timer) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) + call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) - error_calc = bml_fnorm(aux_bml) + error_calc = bml_fnorm(aux_bml) - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif - call prg_timer_stop(loop_timer) + call prg_timer_stop(loop_timer) case("prg_system_parse_write_xyz") - call prg_parse_system(mol,"coords_100","xyz") - call prg_write_system(mol, "mysystem","xyz") - call system("diff -qs --ignore-all-space mysystem.xyz coords_100.xyz > tmp.tmp") - open(1,file="tmp.tmp") - read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) - if(trim(dummy(5)).eq."differ")then - write(*,*) "Error coords are not the same" - error stop - endif + call prg_parse_system(mol,"coords_100","xyz") + call prg_write_system(mol, "mysystem","xyz") + call system("diff -qs --ignore-all-space mysystem.xyz coords_100.xyz > tmp.tmp") + open(1,file="tmp.tmp") + read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) + if(trim(dummy(5)).eq."differ")then + write(*,*) "Error coords are not the same" + error stop + endif case("prg_system_parse_write_pdb") - call prg_parse_system(mol,"protein","pdb") - call prg_write_system(mol, "mysystem","pdb") - call system("diff -qs --ignore-all-space mysystem.pdb protein.pdb > tmp.tmp") - open(1,file="tmp.tmp") - read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) - if(trim(dummy(5)).eq."differ")then - write(*,*) "Error coords are not the same" - error stop - endif + call prg_parse_system(mol,"protein","pdb") + call prg_write_system(mol, "mysystem","pdb") + call system("diff -qs --ignore-all-space mysystem.pdb protein.pdb > tmp.tmp") + open(1,file="tmp.tmp") + read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) + if(trim(dummy(5)).eq."differ")then + write(*,*) "Error coords are not the same" + error stop + endif case("prg_system_parse_write_dat") - call prg_parse_system(mol,"inputblock","dat") - call prg_write_system(mol, "mysystem","dat") - call system("diff -qs --ignore-all-space mysystem.dat inputblock.dat > tmp.tmp") - open(1,file="tmp.tmp") - read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) - if(trim(dummy(5)).eq."differ")then - write(*,*) "Error coords are not the same" - error stop - endif + call prg_parse_system(mol,"inputblock","dat") + call prg_write_system(mol, "mysystem","dat") + call system("diff -qs --ignore-all-space mysystem.dat inputblock.dat > tmp.tmp") + open(1,file="tmp.tmp") + read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) + if(trim(dummy(5)).eq."differ")then + write(*,*) "Error coords are not the same" + error stop + endif case("prg_twolevel_model") - error_tol = 1.0D-10 - call prg_parse_mham(mham,"input-twolevel.in") - call bml_zero_matrix(mham%bml_type,bml_element_real,dp,mham%norbs,mham%norbs,aux_bml) - call bml_zero_matrix(mham%bml_type,bml_element_real,dp,mham%norbs,mham%norbs,ham_bml) - call prg_twolevel_model(mham%ea, mham%eb, mham%dab, mham%daiaj, mham%dbibj, & - &mham%dec, mham%rcoeff, mham%reshuffle, mham%seed, ham_bml, verbose) - call bml_read_matrix(aux_bml,'hamiltonian-twolevel-ref.mtx') - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,ham_bml,0.0_dp) - error_calc = bml_fnorm(aux_bml) - - write(*,*)"prg_twolevel_model error ", error_calc - - if(error_calc.gt.error_tol)then - write(*,*) "Error is too high", error_calc - error stop - endif - - - - !--------------------------------------------- - !LATTE routines - !--------------------------------------------- + error_tol = 1.0D-10 + call prg_parse_mham(mham,"input-twolevel.in") + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,mham%norbs,mham%norbs,aux_bml) + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,mham%norbs,mham%norbs,ham_bml) + call prg_twolevel_model(mham%ea, mham%eb, mham%dab, mham%daiaj, mham%dbibj, & + &mham%dec, mham%rcoeff, mham%reshuffle, mham%seed, ham_bml, verbose) + call bml_read_matrix(aux_bml,'hamiltonian-twolevel-ref.mtx') + call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,ham_bml,0.0_dp) + error_calc = bml_fnorm(aux_bml) + + write(*,*)"prg_twolevel_model error ", error_calc + + if(error_calc.gt.error_tol)then + write(*,*) "Error is too high", error_calc + error stop + endif + + + + !--------------------------------------------- + !LATTE routines + !--------------------------------------------- case("load_tbparms_latte") - call prg_parse_system(mol,"protein","pdb") - !> Loading the tb parameters (electrons.dat) - call load_latteTBparams(tbparams,mol%splist,"./") - call write_latteTBparams(tbparams,"myelectrons.dat") - call system("diff --report-identical-files --ignore-all-space myelectrons.dat electrons.dat > tmp.tmp") - open(1,file="tmp.tmp") - read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) - if(trim(dummy(5)).eq."differ")then - write(*,*) "Error tbparams are not the same" - error stop - endif + call prg_parse_system(mol,"protein","pdb") + !> Loading the tb parameters (electrons.dat) + call load_latteTBparams(tbparams,mol%splist,"./") + call write_latteTBparams(tbparams,"myelectrons.dat") + call system("diff --report-identical-files --ignore-all-space myelectrons.dat electrons.dat > tmp.tmp") + open(1,file="tmp.tmp") + read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) + if(trim(dummy(5)).eq."differ")then + write(*,*) "Error tbparams are not the same" + error stop + endif case("load_bintTBparamsH") - call prg_parse_system(mol,"protein","pdb") - !> Loading the bint parameters (bondints.nonorth) - call load_latteTBparams(tbparams,mol%splist,"./") - call load_bintTBparamsH(mol%splist,tbparams%onsite_energ,& - typeA,typeB,intKind,onsitesH,onsitesS,intPairsH,intPairsS,"./") - call write_bintTBparamsH(typeA,typeB,& - intKind,intPairsH,intPairsS,"mybondints.nonorth") - call system("diff --report-identical-files --ignore-all-space mybondints.nonorth bondints.nonorth > tmp.tmp") - open(1,file="tmp.tmp") - read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) - if(trim(dummy(5)).eq."differ")then - write(*,*) "Error bond int tbparams are not the same" - error stop - endif + call prg_parse_system(mol,"protein","pdb") + !> Loading the bint parameters (bondints.nonorth) + call load_latteTBparams(tbparams,mol%splist,"./") + call load_bintTBparamsH(mol%splist,tbparams%onsite_energ,& + typeA,typeB,intKind,onsitesH,onsitesS,intPairsH,intPairsS,"./") + call write_bintTBparamsH(typeA,typeB,& + intKind,intPairsH,intPairsS,"mybondints.nonorth") + call system("diff --report-identical-files --ignore-all-space mybondints.nonorth bondints.nonorth > tmp.tmp") + open(1,file="tmp.tmp") + read(1,*)dummy(1),dummy(2),dummy(3),dummy(4),dummy(5) + if(trim(dummy(5)).eq."differ")then + write(*,*) "Error bond int tbparams are not the same" + error stop + endif case default - write(*,*)"ERROR: unknown test ",test - error stop + write(*,*)"ERROR: unknown test ",test + error stop end select From 2ac9f501595d7963809ed1e25eee6cddfa34aed9 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Fri, 16 Jul 2021 17:51:18 +0200 Subject: [PATCH 03/10] Added kernel routine --- src/CMakeLists.txt | 1 + src/prg_ewald_mod.F90 | 242 +++++++++++++++++++++++++++++++-- src/prg_implicit_fermi_mod.F90 | 125 +++++++++++------ src/prg_xlbokernel_mod.F90 | 73 ++++++---- 4 files changed, 363 insertions(+), 78 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index becd6040..7d5a6641 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_chebyshev_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_densitymatrix_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_dos_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_ewald_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_extras_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_genz_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_graph_mod.mod diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index 95127797..5539c12d 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -15,8 +15,10 @@ module prg_ewald_mod public :: Ewald_Real_Space_Single_latte public :: Ewald_Real_Space public :: Ewald_Real_Space_latte - public :: Ewald_k_space + public :: Ewald_Real_Space_Test + public :: Ewald_k_space_latte_single public :: Ewald_k_space_latte + public :: Ewald_k_space_Test contains @@ -374,6 +376,123 @@ subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & end subroutine Ewald_Real_Space_latte + subroutine Ewald_Real_Space_Test(COULOMBV,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,nnRx,nnRy,nnRz,nrnnlist,nnType,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nnI,J,Rb,Rab,dR,MAGR,MAGR2,TJ,DC,Z,NUMREP_ERFC,CA) & +! !$OMP REDUCTION(+:COULOMBV) + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo +! !$OMP END PARALLEL DO + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Test + subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) @@ -623,21 +742,119 @@ subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_ end subroutine Ewald_k_Space_latte - subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULACC) implicit none + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, J + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2 + real(PREC) :: CORRFACT,FOURCALPHA2 + real(PREC) :: RECIPVECS(3,3) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: IDOT, JDOT, COSJDOT, SINJDOT, KEPREF + + integer :: I,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,IDOT,JDOT,COSJDOT,SINJDOT,L,M,N,MMIN,MMAX,NMIN,NMAX,L11,M22,K,K2,PREFACTOR,KEPREF) + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M22 = M*RECIPVECS(2,2) + + do N = NMIN,NMAX + K(1) = L11 + K(2) = M22 + K(3) = N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + KEPREF = KECONST*PREFACTOR + JDOT = K(1)*RXYZ(1,J) + K(2)*RXYZ(2,J) + K(3)*RXYZ(3,J) + SINJDOT = sin(JDOT) + COSJDOT = cos(JDOT) + do I = 1,Nr_atoms + IDOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + COULOMBV(I) = COULOMBV(I) + KEPREF*DELTAQ(J)*(COSJDOT*cos(IDOT)+SINJDOT*sin(IDOT)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte_single + + subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) +! + implicit none +! integer, parameter :: PREC = 8 real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 real(PREC), parameter :: pi = 3.14159265358979323846264D0 real(PREC), parameter :: SQRTPI = 1.772453850905516D0 integer, intent(in) :: Nr_atoms, Max_Nr_Neigh - real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC), intent(in) :: COULACC real(PREC) :: KECONST, TFACT, RELPERM real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) real(PREC) :: COULCUT, COULCUT2 - real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) + real(PREC), intent(out) :: COULOMBV(Nr_atoms) real(PREC) :: Ra(3), Rb(3), dR, Rab(3) real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE @@ -647,7 +864,7 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - +! COULVOL = LBox(1)*LBox(2)*LBox(3) SQRTX = sqrt(-log(COULACC)) @@ -673,7 +890,6 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI RELPERM = 1.D0 KECONST = 14.3996437701414D0*RELPERM - FCOUL = ZERO COULOMBV = ZERO SINLIST = ZERO COSLIST = ZERO @@ -714,6 +930,9 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI SINSUM = 0.D0 ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) do I = 1,Nr_atoms DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) ! We re-use these in the next loop... @@ -722,19 +941,18 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) enddo + !$OMP END PARALLEL DO COSSUM2 = COSSUM*COSSUM SINSUM2 = SINSUM*SINSUM ! Add up energy and force contributions - +! KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) do I = 1,Nr_atoms COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) - FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) - FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) - FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) enddo + !$OMP END PARALLEL DO KEPREF = KEPREF*(COSSUM2 + SINSUM2) endif @@ -746,6 +964,6 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; COULOMBV = COULOMBV - CORRFACT*DELTAQ; - end subroutine Ewald_k_Space + end subroutine Ewald_k_Space_Test end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 76bd2bc2..37d03618 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -58,11 +58,11 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, integer, intent(inout) :: occiter type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml - real(dp) :: trdPdmu, trP0, occErr, alpha - real(dp) :: cnst, ofactor, mustep + real(dp) :: trdPdmu, trP0, occErr, alpha, newerr + real(dp) :: cnst, ofactor, mustep, preverr real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type - integer :: N, M, i, iter, muadj, prev + integer :: N, M, i, iter, muadj, prev, maxiter bml_type = bml_get_type(h_bml) N = bml_get_N(h_bml) @@ -78,9 +78,12 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) occErr = 10.0_dp - alpha = 1.0_dp + newerr = 1000_dp + preverr = 1000_dp + alpha = 8.0_dp prev = 0 iter = 0 + maxiter = 30 cnst = beta/(1.0_dp*2**(nsteps+2)) if (SCF_IT .eq. 1) then @@ -95,12 +98,15 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + do i = 1, nsteps + call bml_copy(I_bml, Inv_bml(i)) + enddo else ! Otherwise use previous inverse as starting guess call bml_copy(Inv_bml(1),ai_bml) end if - do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < 50) + do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < maxiter) iter = iter + 1 muadj = 0 write(*,*) 'mu =', mu @@ -116,46 +122,65 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) ! Find inverse ai = (2*(P2-P)+I)^-1 - !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) - call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) - call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, tol, threshold) + !call bml_copy(Inv_bml(i),ai_bml) + call prg_newtonschulz(y_bml, Inv_bml(i), d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(Inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation enddo - trdPdmu = bml_trace(p_bml) - trP0 = trdPdmu - trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 - trdPdmu = beta * trdPdmu + trP0 = bml_trace(p_bml) + trdPdmu = beta*(trP0 - bml_sum_squares(p_bml)) ! sum p(i,j)**2 occErr = abs(trP0 - nocc) - write(*,*) 'occerr =', nocc-trP0 - + write(*,*) 'occerr =', occErr + ! If occupation error is too large, do bisection method - if (occerr > 10.0_dp) then - if (nocc-trP0 < 0.0_dp) then - if (prev .eq. 1) then - alpha = alpha/2 - endif + if (occerr > 1.0_dp) then + ! if (newerr > occerr) then + if (nocc-trP0 < 0.0_dp .and. prev .eq. -1) then prev = -1 - mu = mu - alpha - else - if (prev .eq. -1) then - alpha = alpha/2 - endif + else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then prev = 1 - mu = mu + alpha + else if (nocc-trP0 > 0.0_dp .and. prev .eq. -1) then + prev = 1 + alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo + else + prev = -1 + alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo endif + !newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + !do while(newerr > occerr .and. abs(occerr-newerr)/occerr<0.1 ) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + !enddo +! write(*,*) 'newerr =', newerr + mu = mu + prev*alpha + muadj = 1 + ! Otherwise do Newton else if (occErr .gt. occErrLimit) then mustep = (nocc -trP0)/trdPdmu - if (abs(mustep) > 1.0) then - mustep = 0.1_dp*mustep - end if + !if (abs(mustep) > 1.0 .or. preverr < occErr) then + ! alpha = alpha/2 + ! mustep = alpha*mustep + !end if mu = mu + mustep muadj = 1 + preverr = occErr end if enddo - if (iter .ge. 50) then + if (iter .ge. maxiter) then write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' end if ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. @@ -432,12 +457,12 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, Inv_bml(nsteps) - type(bml_matrix_t), intent(inout) :: P0_bml, P1_bml + type(bml_matrix_t), intent(inout) :: P0_bml,P1_bml real(dp), intent(in) :: mu0, threshold real(dp) :: mu1 real(dp), intent(in) :: beta, nocc integer, intent(in) :: nsteps - type(bml_matrix_t) :: B_bml, C_bml, C0_bml + type(bml_matrix_t) :: B0_bml, B_bml, C_bml, C0_bml character(20) :: bml_type real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst integer :: N, M, i, j, k @@ -446,6 +471,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm N = bml_get_N(H0_bml) M = bml_get_M(H0_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B0_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) @@ -477,11 +503,31 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) enddo +! do i = 1, nsteps-1 + ! D = A^-1*P0 +! call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) +! call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold) + ! B0 = A^-1*P0^2 +! call bml_copy(B_bml,B0_bml) + ! B = I + D -P0*D +! call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold) +! call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold) + ! P1 = 2D*P1(I+D-P0*D) +! call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) +! call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold) +! enddo +! call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold) +! call bml_copy(P1_bml, B_bml) +! call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P1 +! call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) +! call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + + ! dPdmu = beta*P0(I-P0) call bml_copy(P0_bml, B_bml) call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) - call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_scale(beta,C_bml) + call bml_multiply(P0_bml, B_bml, C_bml, beta, 0.0_dp, threshold) dPdmu_trace = bml_trace(C_bml) p1_trace = bml_trace(P1_bml) mu1 = - p1_trace/dPdmu_trace @@ -490,6 +536,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm endif call bml_deallocate(B_bml) + call bml_deallocate(B0_bml) call bml_deallocate(C_bml) call bml_deallocate(C0_bml) @@ -866,9 +913,7 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th N = bml_get_N(a_bml) err = 100000.0 i = 0 - N2 = N*N - scaled_tol = tol*N - do while(err > scaled_tol) + do while(err > tol) !write(*,*) 'iter = ', i !write(*,*) 'ns error =', err call bml_copy(ai_bml, tmp_bml) @@ -880,7 +925,7 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th !write(*,*) "prev_err = ", prev_err if (10*prev_err < err) then write(*,*) 'NS did not converge, calling conjugate gradient' - call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) + call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.00001_dp, threshold) else call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) endif @@ -990,7 +1035,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr do while (r_norm_new .gt. cg_tol) - write(*,*) r_norm_new + ! write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then call bml_copy(tmp_bml, d_bml) @@ -1011,7 +1056,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr stop endif enddo - write(*,*) "Number of CG-iterations:", k + !write(*,*) "Number of CG-iterations:", k end subroutine prg_conjgrad diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index a19cb5b2..81496915 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -83,23 +83,23 @@ end subroutine Invert !! \param Nr_elem Number of elements in Hubbard list. subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & - S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) !! Res = q[n] - n !! KK0 is preconditioner !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu, Nocc real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) real(dp), intent(in) :: Res(Nr_atoms) integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) real(dp), intent(in) :: Hubbard_U(Nr_elem) type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: H1_bml, DO_bml,D1_bml real(dp), intent(in) :: threshold integer, intent(in) :: LMAX character(20) :: bml_type @@ -115,17 +115,19 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml, X_bml, Y_bml call timer_prg_init() bml_type = bml_get_type(HO_bml) N = bml_get_N(HO_bml) MN = bml_get_M(HO_bml) allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) call bml_transpose(Z_bml,ZT_bml) @@ -137,6 +139,7 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ K0Res = row_NA dr = K0Res + write(*,*) 'resnorm =', norm2(Res), 'kresnorm =', norm2(dr) I = 0 Fel = 1.D0 do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish @@ -249,6 +252,8 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ call bml_deallocate(K0Res_bml) call bml_deallocate(Res_bml) call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) call prg_timer_shutdown() end subroutine prg_kernel_multirank_latte @@ -327,8 +332,8 @@ subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM @@ -433,41 +438,43 @@ end subroutine prg_kernel_multirank !! \param Nr_elem Number of elements in Hubbard list. subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & - Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml, & Nocc,m_rec,threshold,beta,Nr_elem) use bml implicit none integer, parameter :: PREC = 8, dp = kind(1.0d0) - integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec + integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Max_Nr_Neigh,m_rec real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K - real(PREC), intent(in) :: Coulomb_acc, threshold,beta + real(PREC), intent(in) :: Coulomb_acc, threshold,beta,Nocc real(PREC) :: v(Nr_atoms) real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) integer, intent(in) :: Hinxlist(HDIM) real(PREC), intent(in) :: Hubbard_U(Nr_elem) type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml - type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml real(PREC), intent(inout) :: mu0 integer, intent(in) :: Element_Pointer(Nr_atoms) real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) real(PREC) :: dq_v(Nr_atoms) - real(PREC) :: dq_dv(Nr_atoms), err,tol + real(PREC) :: dq_dv(Nr_atoms), err,tol,start,ewaldk,ewaldr,ewaldkacc,ewaldracc,response,respacc real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) integer :: I,J,K, ITER, mm,It,N,MN real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) - type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml + type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml, X_bml, Y_bml character(20) :: bml_type bml_type = bml_get_type(HO_bml) N = bml_get_N(HO_bml) MN = bml_get_M(HO_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) call bml_transpose(Z_bml,ZT_bml) allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) Coulomb_Pot_dq_v = ZERO @@ -476,7 +483,11 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & JJ = ZERO KK = ZERO + write(*,*) 'Beginning response calculations' + ewaldracc = 0.0; ewaldkacc = 0.0; respacc = 0.0; do J = 1,Nr_atoms + write(*,*) 'J =',J + call cpu_time(start) dq_v(J) = ONE !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) do I = 1,Nr_atoms @@ -485,9 +496,12 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & - Max_Nr_Neigh) + call cpu_time(ewaldr) + ewaldracc = ewaldracc + ewaldr-start + call Ewald_k_Space_latte_single(Coulomb_Pot_k,J,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc) Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call cpu_time(ewaldk) + ewaldkacc = ewaldkacc + ewaldk-ewaldr call bml_deallocate(H1_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) @@ -512,7 +526,9 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) + m_rec,mu0,beta,Nocc,threshold) + call cpu_time(response) + respacc = respacc + response-ewaldk call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) @@ -540,10 +556,15 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & JJ(I,I) = JJ(I,I) - ONE enddo + write(*,*) 'ewaldracc =', ewaldracc + write(*,*) 'ewaldkacc =', ewaldkacc + write(*,*) 'implcit response time =', respacc call Invert(JJ,KK,Nr_atoms) deallocate(row1); deallocate(row2); deallocate(JJ) call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) end subroutine prg_full_kernel_latte @@ -627,8 +648,8 @@ subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & - TIMERATIO,Max_Nr_Neigh) + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + ! TIMERATIO,Max_Nr_Neigh) Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k diagonal = 0.0_dp @@ -691,9 +712,9 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu,Nocc real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) real(dp), intent(in) :: Res(Nr_atoms) integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) @@ -745,8 +766,8 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM @@ -771,7 +792,7 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) + m_rec, mu, beta, Nocc, threshold) call bml_transpose(Z_bml,X_bml) call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) From 02e93a6c87bbfaa9fdfd775ee5c4b77c10f0f0fd Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Fri, 16 Jul 2021 18:23:21 +0200 Subject: [PATCH 04/10] Fixed test --- tests/src/main.F90 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/src/main.F90 b/tests/src/main.F90 index 3ab0354f..530a3093 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -231,11 +231,10 @@ program main call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) - write(*,*) mu + mu = 0.2_dp call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) - + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) write(*,*) mu call bml_scale(0.5_dp,rho_bml) call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) From ce0e474cf5151279226874e13ac4d791a444218a Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Mon, 11 Jan 2021 15:49:53 +0100 Subject: [PATCH 05/10] Added kernel routines for XL-BOMD --- src/CMakeLists.txt | 3 + src/prg_ewald_mod.F90 | 751 +++++++++++++++++++++++++++++ src/prg_implicit_fermi_mod.F90 | 326 +++++++++++-- src/prg_xlbokernel_mod.F90 | 837 +++++++++++++++++++++++++++++++++ 4 files changed, 1880 insertions(+), 37 deletions(-) create mode 100644 src/prg_ewald_mod.F90 create mode 100644 src/prg_xlbokernel_mod.F90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e0f63b54..9c30e5be 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(progress prg_chebyshev_mod.F90 prg_densitymatrix_mod.F90 prg_dos_mod.F90 + prg_ewald_mod.F90 prg_extras_mod.F90 prg_genz_mod.F90 prg_graph_mod.F90 @@ -46,6 +47,7 @@ add_library(progress prg_subgraphloop_mod.F90 prg_system_mod.F90 prg_timer_mod.F90 + prg_xlbokernel_mod.F90 prg_xlbo_mod.F90) #prg_xlkernel_mod.F90) @@ -122,6 +124,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_system_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_timer_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbo_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_xlbokernel_mod.mod # ${CMAKE_CURRENT_BINARY_DIR}/prg_xlkernel_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/slaterkosterforce_latte_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/tbparams_latte_mod.mod diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 new file mode 100644 index 00000000..5a032412 --- /dev/null +++ b/src/prg_ewald_mod.F90 @@ -0,0 +1,751 @@ +! Ewald sum routines for kernel calculation +module prg_ewald_mod + + use bml + use prg_timer_mod + use prg_parallel_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: Ewald_Real_Space_Single + public :: Ewald_Real_Space_Single_latte + public :: Ewald_Real_Space + public :: Ewald_Real_Space_latte + public :: Ewald_k_space + public :: Ewald_k_space_latte + +contains + +!> Find Coulomb potential on site I from single charge at site J +subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & + DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) +real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) +real(PREC), intent(in) :: U(Nr_elem) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + +integer :: K, ccnt,l,m,n + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +COULOMBV = ZERO + +TI = TFACT*U(Element_Pointer(I)) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RXYZ(1,I) +Ra(2) = RXYZ(2,I) +Ra(3) = RXYZ(3,I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RXYZ(1,J)+k*box(1,1) + Rb(2) = RXYZ(2,J)+m*box(2,2) + Rb(3) = RXYZ(3,J)+l*box(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + endif + enddo + enddo + enddo + +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_Single_latte + +subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) + + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J +real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) +real(PREC), intent(in) :: U(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +character(10), intent(in) :: Element_Type(Nr_atoms) +real(PREC), intent(out) :: COULOMBV, FCOUL(3) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + +integer :: K, ccnt,l,m,n + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +FCOUL = ZERO +COULOMBV = ZERO + +TI = TFACT*U(I) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RX(I) +Ra(2) = RY(I) +Ra(3) = RZ(I) + + do k = -1,1 + do m = -1,1 + do l = -1,1 + + Rb(1) = RX(J)+k*Lbox(1) + Rb(2) = RY(J)+m*Lbox(2) + Rb(3) = RZ(J)+l*Lbox(3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + enddo + enddo + +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_Single + +subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & + DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem +real(PREC), intent(in) :: COULACC +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) +real(PREC), intent(in) :: U(Nr_elem) +real(PREC) :: COULCUT, COULCUT2 +integer, intent(in) :: Element_Pointer(Nr_atoms) +integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) +real(PREC), intent(out) :: COULOMBV +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 +integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +COULOMBV = ZERO + +TI = TFACT*U(Element_Pointer(I)) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RXYZ(1,I) +Ra(2) = RXYZ(2,I) +Ra(3) = RXYZ(3,I) + +do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif +enddo +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space_latte + +subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I +real(PREC), intent(in) :: COULACC, TIMERATIO +real(PREC) :: TFACT, RELPERM, KECONST +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) +real(PREC), intent(in) :: U(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +character(10), intent(in) :: Element_Type(Nr_atoms) +integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(out) :: COULOMBV, FCOUL(3) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ +real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF +real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 +integer :: J,K, ccnt, nnI + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 +COULCUT = 12.D0 +CALPHA = SQRTX/COULCUT +COULCUT2 = COULCUT*COULCUT +CALPHA2 = CALPHA*CALPHA + +RELPERM = ONE +KECONST = 14.3996437701414D0*RELPERM +TFACT = 16.0D0/(5.0D0*KECONST) + +FCOUL = ZERO +COULOMBV = ZERO + +TI = TFACT*U(I) +TI2 = TI*TI +TI3 = TI2*TI +TI4 = TI2*TI2 +TI6 = TI4*TI2 + +SSA = TI +SSB = TI3/48.D0 +SSC = 3.D0*TI2/16.D0 +SSD = 11.D0*TI/16.D0 +SSE = 1.D0 + +Ra(1) = RX(I) +Ra(2) = RY(I) +Ra(3) = RZ(I) + +do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif +enddo +COULOMBV = KECONST*COULOMBV + +end subroutine Ewald_Real_Space + +subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Max_Nr_Neigh +real(PREC), intent(in) :: COULACC +real(PREC) :: KECONST, TFACT, RELPERM +real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV(Nr_atoms) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: CORRFACT,FOURCALPHA2, FORCE +real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) +real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR +real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + +integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + +COULVOL = Box(1,1)*Box(2,2)*Box(3,3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 + +COULCUT = 12.0D0 +CALPHA = SQRTX/COULCUT + +COULCUT2 = COULCUT*COULCUT +KCUTOFF = TWO*CALPHA*SQRTX +KCUTOFF2 = KCUTOFF*KCUTOFF +CALPHA2 = CALPHA*CALPHA +FOURCALPHA2 = FOUR*CALPHA2 + +RECIPVECS = ZERO +RECIPVECS(1,1) = TWO*pi/Box(1,1) +RECIPVECS(2,2) = TWO*pi/Box(2,2) +RECIPVECS(3,3) = TWO*pi/Box(3,3) +LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) +MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) +NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + +RELPERM = 1.D0 +KECONST = 14.3996437701414D0*RELPERM + +COULOMBV = ZERO +SINLIST = ZERO +COSLIST = ZERO + +do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo +enddo + +! Point self energy +CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; +COULOMBV = COULOMBV - CORRFACT*DELTAQ; + +end subroutine Ewald_k_Space_latte + +subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + +implicit none + +integer, parameter :: PREC = 8 +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 +real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 +real(PREC), parameter :: pi = 3.14159265358979323846264D0 +real(PREC), parameter :: SQRTPI = 1.772453850905516D0 +integer, intent(in) :: Nr_atoms, Max_Nr_Neigh +real(PREC), intent(in) :: COULACC, TIMERATIO +real(PREC) :: KECONST, TFACT, RELPERM +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) +real(PREC) :: COULCUT, COULCUT2 +real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) +real(PREC) :: Ra(3), Rb(3), dR, Rab(3) +real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z +real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE +real(PREC) :: CORRFACT,FOURCALPHA2, FORCE +real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) +real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR +real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + +integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + +COULVOL = LBox(1)*LBox(2)*LBox(3) +SQRTX = sqrt(-log(COULACC)) + +ccnt = 0 + +COULCUT = 12.0D0 +CALPHA = SQRTX/COULCUT + +COULCUT2 = COULCUT*COULCUT +KCUTOFF = TWO*CALPHA*SQRTX +KCUTOFF2 = KCUTOFF*KCUTOFF +CALPHA2 = CALPHA*CALPHA +FOURCALPHA2 = FOUR*CALPHA2 + +RECIPVECS = ZERO +RECIPVECS(1,1) = TWO*pi/LBox(1) +RECIPVECS(2,2) = TWO*pi/LBox(2) +RECIPVECS(3,3) = TWO*pi/LBox(3) +LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) +MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) +NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + +RELPERM = 1.D0 +KECONST = 14.3996437701414D0*RELPERM + +FCOUL = ZERO +COULOMBV = ZERO +SINLIST = ZERO +COSLIST = ZERO + +do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + do I = 1,Nr_atoms + DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) + FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) + FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) + FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) + enddo + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo +enddo + +! Point self energy +CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; +COULOMBV = COULOMBV - CORRFACT*DELTAQ; + +end subroutine Ewald_k_Space + +end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 5c30947c..2e217a98 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -5,11 +5,13 @@ !! module prg_implicit_fermi_mod + use omp_lib use bml use prg_normalize_mod use prg_densitymatrix_mod use prg_timer_mod use prg_parallel_mod + use prg_ewald_mod implicit none @@ -18,13 +20,164 @@ module prg_implicit_fermi_mod integer, parameter :: dp = kind(1.0d0) public :: prg_implicit_fermi + public :: prg_implicit_fermi_save_inverse public :: prg_implicit_fermi_zero public :: prg_test_density_matrix public :: prg_implicit_fermi_response + public :: prg_implicit_fermi_first_order_response public :: prg_finite_diff contains + !> Recursive Implicit Fermi Dirac for finite temperature. + !! \param Inv_bml Inverses generated by algorithm. + !! \param h_bml Input Hamiltonian matrix. + !! \param p_bml Output density matrix. + !! \param nsteps Number of recursion steps. + !! \param nocc Number of occupied states. + !! \param mu Shifted chemical potential + !! \param beta Input inverse temperature. + !! \param occErrLimit Occupation error limit. + !! \param threshold Threshold for multiplication. + !! \param tol Tolerance for linear system solver. + !! \param SCF_IT The current SCF iteration. + !! \param occiter Counts the total nr of DM calculations during MD. + !! See \cite{niklasson2003} + subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, & + mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter) + + implicit none + + type(bml_matrix_t), intent(in) :: h_bml + type(bml_matrix_t), intent(inout) :: p_bml, Inv_bml(nsteps) + integer, intent(in) :: nsteps, SCF_IT + real(dp), intent(in) :: nocc, threshold + real(dp), intent(in) :: tol + real(dp), intent(in) :: occErrLimit, beta + real(dp), intent(inout) :: mu + integer, intent(inout) :: occiter + + type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml + real(dp) :: trdPdmu, trP0, occErr, alpha + real(dp) :: cnst, ofactor, mustep + real(dp), allocatable :: trace(:), gbnd(:) + character(20) :: bml_type + integer :: N, M, i, iter, muadj, prev + + bml_type = bml_get_type(h_bml) + N = bml_get_N(h_bml) + M = bml_get_M(h_bml) + + allocate(trace(2)) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) + + occErr = 10.0_dp + alpha = 1.0_dp + prev = 0 + iter = 0 + cnst = beta/(1.0_dp*2**(nsteps+2)) + + if (SCF_IT .eq. 1) then + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + ! Generate good starting guess for (2*(P2-P)+1)^-1 using conjugate gradient + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + II + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + else + ! Otherwise use previous inverse as starting guess + call bml_copy(Inv_bml(1),ai_bml) + end if + + do while (occErr .gt. occErrLimit .or. muadj .eq. 1) + iter = iter + 1 + muadj = 0 + write(*,*) 'mu =', mu + ! Normalization + ! P0 = 0.5*I - cnst*(H0-mu0*I) + call bml_copy(h_bml, p_bml) + call prg_normalize_implicit_fermi(p_bml, cnst, mu) + + do i = 1, nsteps + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + I + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + ! Find inverse ai = (2*(P2-P)+I)^-1 + !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + enddo + + trdPdmu = bml_trace(p_bml) + trP0 = trdPdmu + trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 + trdPdmu = beta * trdPdmu + occErr = abs(trP0 - nocc) + write(*,*) 'occerr =', nocc-trP0 + + ! If occupation error is too large, do bisection method + if (occerr > 10.0_dp) then + if (nocc-trP0 < 0.0_dp) then + if (prev .eq. 1) then + alpha = alpha/2 + endif + prev = -1 + mu = mu - alpha + else + if (prev .eq. -1) then + alpha = alpha/2 + endif + prev = 1 + mu = mu + alpha + endif + ! Otherwise do Newton + else if (occErr .gt. occErrLimit) then + mustep = (nocc -trP0)/trdPdmu + if (abs(mustep) > 1.0) then + mustep = 0.1_dp*mustep + end if + mu = mu + mustep + muadj = 1 + end if + enddo + ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. + ! For now we recompute the DM one extra time if mu was adjusted. + !if (muadj .eq. 1) then + ! Adjust occupation + ! call bml_copy(p_bml, d_bml) + ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) + ! ofactor = ((nocc - trP0)/trdPdmu) * beta + ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !end if + occiter = occiter + iter + call bml_scale(2.0_dp,p_bml) + deallocate(trace) + + call bml_deallocate(p2_bml) + call bml_deallocate(w_bml) + call bml_deallocate(d_bml) + call bml_deallocate(y_bml) + call bml_deallocate(aux_bml) + call bml_deallocate(ai_bml) + call bml_deallocate(I_bml) + + end subroutine prg_implicit_fermi_save_inverse + !> Recursive Implicit Fermi Dirac for finite temperature. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. @@ -63,6 +216,11 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & N = bml_get_N(h_bml) M = bml_get_M(h_bml) + call bml_print_matrix("h_bml",h_bml,1,10,1,10) + call bml_print_matrix("p_bml",p_bml,1,10,1,10) + write(*,*) nsteps, k, nocc, & + mu, beta, method, osteps, occErrLimit, threshold, tol + !stop allocate(trace(2)) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) @@ -106,7 +264,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & else call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, aux1_bml, w_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -123,9 +281,9 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold) end if if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, aux1_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux1_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo @@ -185,7 +343,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, real(dp), intent(in) :: mu, threshold real(dp), intent(inout), optional :: tol - type(bml_matrix_t) :: w_bml, y_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml + type(bml_matrix_t) :: w_bml, y_bml, c_bml, d_bml, p2_bml, aux1_bml, aux2_bml, I_bml, ai_bml real(dp) :: cnst real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type @@ -201,6 +359,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, c_bml) if (method .eq. 1) then call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) @@ -221,7 +380,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_copy(p2_bml, y_bml) call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) - call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, tol, threshold) + call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, c_bml, tol, threshold) enddo else write(*,*) "Doing NS" @@ -232,9 +391,9 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) if (i .eq. 1) then - call prg_conjgrad(y_bml, ai_bml, I_bml, d_bml, w_bml, 0.9_dp, threshold) + call prg_conjgrad(y_bml, ai_bml, I_bml, c_bml, d_bml, w_bml, 0.9_dp, threshold) end if - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, tol, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, c_bml, I_bml, tol, threshold) call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) enddo endif @@ -246,6 +405,7 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) + call bml_deallocate(c_bml) if (method .eq. 1) then call bml_deallocate(ai_bml) call bml_deallocate(I_bml) @@ -253,6 +413,89 @@ subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, end subroutine prg_implicit_fermi_zero + + !> Calculate first order density matrix response to perturbations using Implicit Fermi Dirac. + !! \param H0_bml Input Hamiltonian matrix. + !! \param H1_bml Input First order perturbation of H0. + !! \param P0_bml Output density matrix. + !! \param P1_bml Output First order density matrix response. + !! \param nsteps Number of recursion steps. + !! \param mu0 Shifted chemical potential. + !! \param beta Input inverse temperature. + !! \param nocc Number of occupied states. + !! \param threshold Threshold for matrix algebra. + !! See \cite{niklasson2015} + subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bml, & + Inv_bml, nsteps, mu0, beta, nocc, threshold) + + implicit none + + type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, Inv_bml(nsteps) + type(bml_matrix_t), intent(inout) :: P0_bml, P1_bml + real(dp), intent(in) :: mu0, threshold + real(dp) :: mu1 + real(dp), intent(in) :: beta, nocc + integer, intent(in) :: nsteps + type(bml_matrix_t) :: B_bml, C_bml, C0_bml + character(20) :: bml_type + real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst + integer :: N, M, i, j, k + + bml_type = bml_get_type(H0_bml) + N = bml_get_N(H0_bml) + M = bml_get_M(H0_bml) + + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) + + cnst = beta/(2**(2+nsteps)) + + ! P0 = 0.5*II - cnst*(H0-mu0*II) + call bml_copy(H0_bml, P0_bml) + call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) + + ! P1 = - cnst*H1 + call bml_copy(H1_bml, P1_bml) + call bml_scale(-1.0_dp*cnst, P1_bml) + do i = 1, nsteps + + ! Calculate coefficient matrices + ! C0 = P0^2 + call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! C = P0*P1+P1*P0, B = 2(P1 - C) + call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_copy(P1_bml, B_bml) + call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P0 + call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) + ! Get next P1 + ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) + call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + enddo + + ! dPdmu = beta*P0(I-P0) + call bml_copy(P0_bml, B_bml) + call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_scale(beta,C_bml) + dPdmu_trace = bml_trace(C_bml) + p1_trace = bml_trace(P1_bml) + mu1 = - p1_trace/dPdmu_trace + if (abs(dPdmu_trace) > 1e-8) then + call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + endif + + call bml_deallocate(B_bml) + call bml_deallocate(C_bml) + call bml_deallocate(C0_bml) + + + end subroutine prg_implicit_fermi_first_order_response + + !> Calculate density matrix response to perturbations using Implicit Fermi Dirac. !! \param H0_bml Input Hamiltonian matrix. !! \param H1_bml, H2_bml, H3_bml Input First to third order perturbations of H0. @@ -270,7 +513,7 @@ end subroutine prg_implicit_fermi_zero !! See \cite{niklasson2015} subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, & nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold) - + implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, H2_bml, H3_bml @@ -279,7 +522,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P real(dp), allocatable, intent(inout) :: mu(:) real(dp), intent(in) :: beta, occ_tol, lin_tol, nocc integer, intent(in) :: nsteps - type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, C0_bml, T_bml, Ti_bml + type(bml_matrix_t) :: I_bml, tmp1_bml, tmp2_bml, tmp3_bml, C0_bml, T_bml, Ti_bml type(bml_matrix_t), allocatable :: B_bml(:), P_bml(:), C_bml(:), H_bml(:) real(dp), allocatable :: p_trace(:), trace(:) character(20) :: bml_type @@ -305,6 +548,7 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P end do call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp1_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp3_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, tmp2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, T_bml) @@ -371,10 +615,10 @@ subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P call bml_scale_add_identity(T_bml, 2.0_dp, 1.0_dp, threshold) ! Find T-inverse if (i .eq. 1) then - call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, 0.01_dp, threshold) + call prg_conjgrad(T_bml, Ti_bml, I_bml, tmp1_bml, tmp2_bml, tmp3_bml,0.01_dp, threshold) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) end if - call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, lin_tol, threshold) + call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, tmp3_bml, I_bml, lin_tol, threshold) ! Get next P0 call bml_multiply(Ti_bml, C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) ! Get next P1 @@ -608,31 +852,39 @@ end subroutine prg_setup_linsys !! \param tol Convergence criterion (Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, tol, threshold) + subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, threshold) implicit none - type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml - type(bml_matrix_t), intent(in) :: a_bml + type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml, d_bml + type(bml_matrix_t), intent(in) :: a_bml, I_bml real(dp), intent(in) :: threshold, tol - real(dp) :: norm - integer :: i + real(dp) :: err,prev_err,scaled_tol + integer :: i,N,N2 - norm = 1.0 + N = bml_get_N(a_bml) + err = 100000.0 i = 0 - do while(norm > tol) + N2 = N*N + scaled_tol = tol*N + do while(err > scaled_tol) + !write(*,*) 'iter = ', i call bml_copy(ai_bml, tmp_bml) call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold) call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold) - norm = bml_fnorm(r_bml) - ! write(*,*) "norm = ", norm - if (norm < tol) then - exit - end if - call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + prev_err = err + err = bml_fnorm(r_bml) + !write(*,*) "err = ", err + !write(*,*) "prev_err = ", prev_err + if (10*prev_err < err) then + write(*,*) 'NS did not converge, calling conjugate gradient' + call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) + else + call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) + endif i = i + 1 enddo - ! write(*,*) "Number of NS iterations:", i + !write(*,*) "Number of NS iterations:", i end subroutine prg_newtonschulz ! Preconditioned CG, preconditioner inverse diagonal of A @@ -717,42 +969,42 @@ end subroutine prg_pcg !! \param w_bml Auxillary matrix !! \param cg_tol Convergence condition (OBS squared Frobenius norm of residual matrix) !! \param threshold Threshold for matrix algebra - subroutine prg_conjgrad(A_bml, p_bml, p2_bml, d_bml, w_bml, cg_tol, threshold) + subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, threshold) implicit none - type(bml_matrix_t), intent(in) :: A_bml - type(bml_matrix_t), intent(inout) :: p_bml, p2_bml, d_bml, w_bml + type(bml_matrix_t), intent(in) :: A_bml, p2_bml + type(bml_matrix_t), intent(inout) :: p_bml, tmp_bml, d_bml, w_bml real(dp), intent(in) :: cg_tol, threshold real(dp) :: alpha, beta integer :: k real(dp) :: r_norm_old, r_norm_new - call bml_multiply(A_bml, p_bml, p2_bml, -1.0_dp, 1.0_dp, threshold) - r_norm_new = bml_sum_squares(p2_bml) + call bml_copy(p2_bml,tmp_bml) + call bml_multiply(A_bml, p_bml, tmp_bml, -1.0_dp, 1.0_dp, threshold) + r_norm_new = bml_sum_squares(tmp_bml) k = 0 do while (r_norm_new .gt. cg_tol) - ! write(*,*) r_norm_new + write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then - write(*,*) r_norm_new - call bml_copy(p2_bml, d_bml) + call bml_copy(tmp_bml, d_bml) else beta = r_norm_new/r_norm_old - call bml_add(d_bml, p2_bml, beta, 1.0_dp, threshold) + call bml_add(d_bml, tmp_bml, beta, 1.0_dp, threshold) endif call bml_multiply(A_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) alpha = r_norm_new/bml_trace_mult(d_bml, w_bml) call bml_add(p_bml, d_bml, 1.0_dp, alpha, threshold) - call bml_add(p2_bml, w_bml, 1.0_dp, -alpha, threshold) + call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold) r_norm_old = r_norm_new - r_norm_new = bml_sum_squares(p2_bml) - if (k .gt. 50) then + r_norm_new = bml_sum_squares(tmp_bml) + if (k .gt. 500) then write(*,*) "Conjugate gradient is not converging" stop endif diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 new file mode 100644 index 00000000..54f4ecf1 --- /dev/null +++ b/src/prg_xlbokernel_mod.F90 @@ -0,0 +1,837 @@ +!> Pre-conditioned O(N) calculation of the kernel for XL-BOMD. +!! \ingroup PROGRESS +!! \brief Here are subroutines implementing Niklasson's scheme for +!! low-rank, Krylov subspace approximation of the kernel. +module prg_xlbokernel_mod + + use omp_lib + use bml + use prg_normalize_mod + use prg_densitymatrix_mod + use prg_timer_mod + use prg_parallel_mod + use prg_ewald_mod + use prg_implicit_fermi_mod + + implicit none + + private !Everything is private by default + + integer, parameter :: dp = kind(1.0d0) + + public :: prg_kernel_multirank + public :: prg_kernel_multirank_latte + public :: prg_kernel_matrix_multirank + public :: prg_full_kernel + public :: prg_full_kernel_latte + +contains + +subroutine Invert(A,AI,N) + +implicit none +integer, parameter :: PREC = 8 +integer, intent(in) :: N +real(PREC), intent(in) :: A(N,N) +real(PREC), intent(out) :: AI(N,N) +real(PREC) :: WORK(N+N*N)!, C(N,N) +integer :: LDA, LWORK, M, INFO, IPIV(N) +integer :: I,J,K + +external DGETRF +external DGETRI + +AI = A +LDA = N +M = N +LWORK = N+N*N + +call DGETRF(M, N, AI, LDA, IPIV, INFO) +call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) + +end subroutine Invert + +!> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) +!! \param KRes The low rank approximation +!! \param KK0_bml The pre-conditioner K0. +!! \param Res The residual q[n]-n +!! \param FelTol Relative error tolerance for approximation +!! \param L Number of vectors used. +!! \param LMAX Maximum nr of vectors to use. +!! \param NUMRANK Nr of vectors to use. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param mu The chemical potiential. +!! \param beta Scaled inverse temperature. +!! \param RXYZ Nuclear coordinates. +!! \param Box Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Pointer List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy. +!! \param nebcoul Neighbour lists. +!! \param totnebcoul Number of neighbours list. +!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Nocc Occupation. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param Nr_elem Number of elements in Hubbard list. +subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) + real(dp), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(20) :: bml_type + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It,N,MN + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp, start, finish + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + + call timer_prg_init() + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + + call bml_transpose(Z_bml,ZT_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_set_row(Res_bml,1,Res,threshold) + call bml_transpose(KK0_bml,KK0T_bml) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + ! Compute H1 = H(v) + dq_v = v + call prg_timer_start(1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & + dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call prg_timer_stop(1,1) + + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1,Nr_atoms-1 + do K = Hinxlist(It)+1,Hinxlist(It+1) + row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + ! H1 = 1/2(S*H1+H1*S) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + ! H1 = Z^T H1 Z + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + ! D1 = Z D1 Z^T + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute dq/dv + call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + call bml_deallocate(ZT_bml) + call prg_timer_shutdown() + + end subroutine prg_kernel_multirank_latte + + ! Above routine but for development code + subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_transpose(KK0_bml,KK0T_bml) + call bml_set_row(Res_bml,1,Res,ONE*1e-14) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + + enddo + + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + + end subroutine prg_kernel_multirank + +!> Compute full inverse Jacobian of q[n]-n (for LATTE) +!! \param KK The inverse Jacobian. +!! \param DO_bml Orthogonalized density matrix. +!! \param mu0 The chemical potiential. +!! \param RXYZ Nuclear coordinates. +!! \param Box Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Pointer List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy +!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param Nocc Occupation. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param beta Scaled inverse temperature. +!! \param Nr_elem Number of elements in Hubbard list. +subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & +Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & +Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & +Nocc,m_rec,threshold,beta,Nr_elem) + +use bml + +implicit none +integer, parameter :: PREC = 8, dp = kind(1.0d0) +integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 +real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K +real(PREC), intent(in) :: Coulomb_acc, threshold,beta +real(PREC) :: v(Nr_atoms) +real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) +integer, intent(in) :: Hinxlist(HDIM) +real(PREC), intent(in) :: Hubbard_U(Nr_elem) +type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) +type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml +type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml +real(PREC), intent(inout) :: mu0 +integer, intent(in) :: Element_Pointer(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) +real(PREC) :: dq_v(Nr_atoms) +real(PREC) :: dq_dv(Nr_atoms), err,tol +real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) +integer :: I,J,K, ITER, mm,It,N,MN +real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) +type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml +character(20) :: bml_type + + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_transpose(Z_bml,ZT_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & + dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & + Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms-1 + do K = Hinxlist(I)+1,Hinxlist(I+1) + row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) + dq_dv(It) = dq_dv(It) + row1(K) + enddo + JJ(It,J) = dq_dv(It) + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + + call Invert(JJ,KK,Nr_atoms) + + deallocate(row1); deallocate(row2); deallocate(JJ) + call bml_deallocate(ZT_bml) + +end subroutine prg_full_kernel_latte + +!> Compute full inverse Jacobian of q[n]-n (for development code) +!! \param KK The inverse Jacobian. +!! \param DO_bml Orthogonalized density matrix. +!! \param mu0 The chemical potiential. +!! \param RX,RY,RZ Nuclear coordinates. +!! \param Lbox Box dimensions. +!! \param Hubbard_U Hubbard U list. +!! \param Element_Type List to keep track of elements. +!! \param Nr_atoms The number of atoms. +!! \param HDIM Hamiltonian matrix dimension. +!! \param Max_Nr_Neigh Max neighbours for Ewald. +!! \param Coulomb_acc Coulomb accuracy +!! \param TIMERATIO Parameter for Ewald +!! \param nnRx,nnRy,nnRz Neighbour lists. +!! \param nrnnlist Number of neighbours list. +!! \param nnType Refers to original order of atoms. +!! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. +!! \param S_bml The S matrix. +!! \param Z_bml, The Z matrix. +!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. +!! \param HO_bml, Orthogonalized Hamiltonian matrix. +!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. +!! \param Nocc Occupation. +!! \param Znuc List of nuclear charges. +!! \param m_rec Number of recursion steps. +!! \param threshold Threshold value for matrix truncation. +!! \param beta Scaled inverse temperature. +!! \param diagonal Auxillary vector. + +subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & +Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & +nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & +Nocc,Znuc,m_rec,threshold,beta,diagonal) + +use bml + +implicit none +integer, parameter :: PREC = 8, dp = kind(1.0d0) +integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec +real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 +real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K +real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta +real(PREC) :: v(Nr_atoms) +real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) +integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) +real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) +type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) +type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml +type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml +real(PREC), intent(inout) :: mu0, diagonal(HDIM) +character(10), intent(in) :: Element_Type(Nr_atoms) +integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) +real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) +real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) +real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) +real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) +real(PREC) :: dq_v(Nr_atoms) +real(PREC) :: dq_dv(Nr_atoms) +real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) +integer :: I,J,K, ITER, mm,It +real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms + call Ewald_Real_Space_Single(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,I,RX,RY,RZ,LBox, & + dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + diagonal = 0.0_dp + do I = 1,HDIM + call bml_set_row(H1_bml,I,diagonal,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms + do K = H_INDEX_START(I),H_INDEX_END(I) + diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,diagonal,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + JJ(It,J) = dq_dv(It) + enddo + dq_v = ZERO + enddo + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + call Invert(JJ,KK,Nr_atoms) + deallocate(row1); deallocate(row2); deallocate(JJ) + +end subroutine prg_full_kernel + + +! Compute the low-rank kernel matrix. (For development code) + subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + +!! Res = q[n] - n +!! KK0 is preconditioner +!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: KK0_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(out) :: KRes(Nr_atoms) + integer :: I,J,K,It,col,row + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp,elem + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) + + allocate(row1(HDIM));allocate(row2(HDIM)); + + dr = Res + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + dq_v = v + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO + + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo + enddo + !$OMP END PARALLEL DO + + dr = dq_dv - v + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + + ! Does not work, need to normalize matrix + if ((Fel > FelTol).AND.(I < (LMAX))) then + deallocate(O, M) + else + do row = 1,Nr_atoms + do col = 1,Nr_atoms + elem = 0.0 + do K = 1,L + do J = 1,L + elem = elem + M(J,K)*vi(row,J)*fi(col,K) + enddo + enddo + if (abs(elem) > threshold) then + call bml_set_element(KK0_bml,row,col,elem) + endif + enddo + enddo + deallocate(O,M) + endif + enddo + deallocate(row1); deallocate(row2); + + end subroutine prg_kernel_matrix_multirank + +end module prg_xlbokernel_mod From bf9a36831227d21621931f9c7ae86ef625d412e5 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Wed, 12 May 2021 19:34:44 +0200 Subject: [PATCH 06/10] O(N) pre-conditioned kernel matrix with implicit FD-expansion response --- src/prg_ewald_mod.F90 | 1386 ++++++++++++++--------------- src/prg_implicit_fermi_mod.F90 | 210 ++--- src/prg_xlbokernel_mod.F90 | 1505 ++++++++++++++++---------------- tests/CMakeLists.txt | 1 + tests/src/main.F90 | 47 +- 5 files changed, 1595 insertions(+), 1554 deletions(-) diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index 5a032412..95127797 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -1,4 +1,4 @@ -! Ewald sum routines for kernel calculation +! Ewald sum routines for kernel calculation module prg_ewald_mod use bml @@ -20,550 +20,550 @@ module prg_ewald_mod contains -!> Find Coulomb potential on site I from single charge at site J -subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & - DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) -real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) -real(PREC), intent(in) :: U(Nr_elem) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 - -integer :: K, ccnt,l,m,n - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -COULOMBV = ZERO - -TI = TFACT*U(Element_Pointer(I)) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RXYZ(1,I) -Ra(2) = RXYZ(2,I) -Ra(3) = RXYZ(3,I) + !> Find Coulomb potential on site I from single charge at site J + subroutine Ewald_Real_Space_Single_latte(COULOMBV,I,RXYZ,Box,Nr_elem, & + DELTAQ,J,U,Element_Pointer,Nr_atoms,COULACC,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Nr_elem, HDIM, Max_Nr_Neigh, I, J, Element_Pointer(Nr_atoms) + real(PREC), intent(in) :: COULACC, DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) do k = -1,1 - do m = -1,1 - do l = -1,1 - - Rb(1) = RXYZ(1,J)+k*box(1,1) - Rb(2) = RXYZ(2,J)+m*box(2,2) - Rb(3) = RXYZ(3,J)+l*box(3,3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(Element_Pointer(J)) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - EXPTI = exp(-TI*MAGR ) - - if (Element_Pointer(I).eq.Element_Pointer(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - endif - endif - enddo - enddo + do m = -1,1 + do l = -1,1 + + Rb(1) = RXYZ(1,J)+k*box(1,1) + Rb(2) = RXYZ(2,J)+m*box(2,2) + Rb(3) = RXYZ(3,J)+l*box(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + endif + enddo + enddo enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_Single_latte - -subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & - DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) - - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J -real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) -real(PREC), intent(in) :: U(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -character(10), intent(in) :: Element_Type(Nr_atoms) -real(PREC), intent(out) :: COULOMBV, FCOUL(3) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 - -integer :: K, ccnt,l,m,n - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -FCOUL = ZERO -COULOMBV = ZERO - -TI = TFACT*U(I) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RX(I) -Ra(2) = RY(I) -Ra(3) = RZ(I) + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single_latte + + subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,J,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,HDIM,Max_Nr_Neigh) + + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, J + real(PREC), intent(in) :: COULACC, TIMERATIO,DELTAQ(Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + + integer :: K, ccnt,l,m,n + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) do k = -1,1 - do m = -1,1 - do l = -1,1 - - Rb(1) = RX(J)+k*Lbox(1) - Rb(2) = RY(J)+m*Lbox(2) - Rb(3) = RZ(J)+l*Lbox(3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(J) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR - EXPTI = exp(-TI*MAGR ) - - if (Element_Type(I).eq.Element_Type(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & - + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & - + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) - endif - - FCOUL(1) = FCOUL(1) + DC(1)*FORCE - FCOUL(2) = FCOUL(2) + DC(2)*FORCE - FCOUL(3) = FCOUL(3) + DC(3)*FORCE - endif - enddo + do m = -1,1 + do l = -1,1 + + Rb(1) = RX(J)+k*Lbox(1) + Rb(2) = RY(J)+m*Lbox(2) + Rb(3) = RZ(J)+l*Lbox(3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif + + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + enddo enddo + + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Single + + subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & + DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_latte + + subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV, FCOUL(3) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + FCOUL = ZERO + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & + + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & + + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) + endif -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_Single - -subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & - DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I, Nr_Elem -real(PREC), intent(in) :: COULACC -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) -real(PREC), intent(in) :: U(Nr_elem) -real(PREC) :: COULCUT, COULCUT2 -integer, intent(in) :: Element_Pointer(Nr_atoms) -integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) -real(PREC), intent(out) :: COULOMBV -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 -integer :: J,K, ccnt, newj, PBCI,PBCJ,PBCK - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -COULOMBV = ZERO - -TI = TFACT*U(Element_Pointer(I)) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RXYZ(1,I) -Ra(2) = RXYZ(2,I) -Ra(3) = RXYZ(3,I) - -do newj = 1,totnebcoul(I) - J = NEBCOUL(1, NEWJ, I) - PBCI = NEBCOUL(2, NEWJ, I) - PBCJ = NEBCOUL(3, NEWJ, I) - PBCK = NEBCOUL(4, NEWJ, I) - Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & - REAL(PBCK)*BOX(3,1) - - Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & - REAL(PBCK)*BOX(3,2) - - Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & - REAL(PBCK)*BOX(3,3) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(Element_Pointer(J)) - DC = Rab/dR - - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - EXPTI = exp(-TI*MAGR ) - - if (Element_Pointer(I).eq.Element_Pointer(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - endif - - endif -enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space_latte - -subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & - DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, I -real(PREC), intent(in) :: COULACC, TIMERATIO -real(PREC) :: TFACT, RELPERM, KECONST -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) -real(PREC), intent(in) :: U(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -character(10), intent(in) :: Element_Type(Nr_atoms) -integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(out) :: COULOMBV, FCOUL(3) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ -real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF -real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 -integer :: J,K, ccnt, nnI - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 -COULCUT = 12.D0 -CALPHA = SQRTX/COULCUT -COULCUT2 = COULCUT*COULCUT -CALPHA2 = CALPHA*CALPHA - -RELPERM = ONE -KECONST = 14.3996437701414D0*RELPERM -TFACT = 16.0D0/(5.0D0*KECONST) - -FCOUL = ZERO -COULOMBV = ZERO - -TI = TFACT*U(I) -TI2 = TI*TI -TI3 = TI2*TI -TI4 = TI2*TI2 -TI6 = TI4*TI2 - -SSA = TI -SSB = TI3/48.D0 -SSC = 3.D0*TI2/16.D0 -SSD = 11.D0*TI/16.D0 -SSE = 1.D0 - -Ra(1) = RX(I) -Ra(2) = RY(I) -Ra(3) = RZ(I) - -do nnI = 1,nrnnlist(I) - Rb(1) = nnRx(I,nnI) - Rb(2) = nnRy(I,nnI) - Rb(3) = nnRz(I,nnI) - J = nnType(I,nnI) - Rab = Rb-Ra ! OBS b - a !!! - dR = norm2(Rab) - MAGR = dR - MAGR2 = dR*dR - - if ((dR <= COULCUT).and.(dR > 1e-12)) then - - TJ = TFACT*U(J) - DC = Rab/dR - - ! Not Using Numerical Recipes ERFC - Z = abs(CALPHA*MAGR) - NUMREP_ERFC = erfc(Z) - - CA = NUMREP_ERFC/MAGR - COULOMBV = COULOMBV + DELTAQ(J)*CA - ccnt = ccnt + 1 - !TEST(ccnt) = DELTAQ(J)*CA - CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI - FORCE = -KECONST*DELTAQ(I)*DELTAQ(J)*CA/MAGR - EXPTI = exp(-TI*MAGR ) - - if (Element_Type(I).eq.Element_Type(J)) then - COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - ccnt = ccnt + 1 - !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) - FORCE = FORCE + (KECONST*DELTAQ(I)*DELTAQ(J)*EXPTI)*((SSE/MAGR2 - TWO*SSB*MAGR - SSC) & - + SSA*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR)) - else - TJ2 = TJ*TJ - TJ3 = TJ2*TJ - TJ4 = TJ2*TJ2 - TJ6 = TJ4*TJ2 - EXPTJ = exp( -TJ*MAGR ) - TI2MTJ2 = TI2 - TJ2 - TJ2MTI2 = -TI2MTJ2 - SA = TI - SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) - SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) - SD = TJ - SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) - SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) - - COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) - FORCE = FORCE + KECONST*DELTAQ(I)*DELTAQ(J)*((EXPTI*(SA*(SB - (SC/MAGR)) - (SC/MAGR2))) & - + (EXPTJ*(SD*(SE - (SF/MAGR)) - (SF/MAGR2)))) - endif - - FCOUL(1) = FCOUL(1) + DC(1)*FORCE - FCOUL(2) = FCOUL(2) + DC(2)*FORCE - FCOUL(3) = FCOUL(3) + DC(3)*FORCE - endif -enddo -COULOMBV = KECONST*COULOMBV - -end subroutine Ewald_Real_Space - -subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Max_Nr_Neigh -real(PREC), intent(in) :: COULACC -real(PREC) :: KECONST, TFACT, RELPERM -real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV(Nr_atoms) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: CORRFACT,FOURCALPHA2, FORCE -real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) -real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR -real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 - -integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - -COULVOL = Box(1,1)*Box(2,2)*Box(3,3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 - -COULCUT = 12.0D0 -CALPHA = SQRTX/COULCUT - -COULCUT2 = COULCUT*COULCUT -KCUTOFF = TWO*CALPHA*SQRTX -KCUTOFF2 = KCUTOFF*KCUTOFF -CALPHA2 = CALPHA*CALPHA -FOURCALPHA2 = FOUR*CALPHA2 - -RECIPVECS = ZERO -RECIPVECS(1,1) = TWO*pi/Box(1,1) -RECIPVECS(2,2) = TWO*pi/Box(2,2) -RECIPVECS(3,3) = TWO*pi/Box(3,3) -LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) -MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) -NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) - -RELPERM = 1.D0 -KECONST = 14.3996437701414D0*RELPERM - -COULOMBV = ZERO -SINLIST = ZERO -COSLIST = ZERO - -do L = 0,LMAX - - if (L.eq.0) then - MMIN = 0 - else - MMIN = -MMAX - endif - - L11 = L*RECIPVECS(1,1) - L12 = L*RECIPVECS(1,2) - L13 = L*RECIPVECS(1,3) - - do M = MMIN,MMAX + FCOUL(1) = FCOUL(1) + DC(1)*FORCE + FCOUL(2) = FCOUL(2) + DC(2)*FORCE + FCOUL(3) = FCOUL(3) + DC(3)*FORCE + endif + enddo + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space + + subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX NMIN = -NMAX if ((L==0).and.(M==0)) then @@ -575,122 +575,122 @@ subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_ M23 = L13 + M*RECIPVECS(2,3) do N = NMIN,NMAX - K(1) = M21 + N*RECIPVECS(3,1) - K(2) = M22 + N*RECIPVECS(3,2) - K(3) = M23 + N*RECIPVECS(3,3) - K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) - if (K2.le.KCUTOFF2) then - PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) - PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); - - COSSUM = 0.D0 - SINSUM = 0.D0 - - ! Doing the sin and cos sums - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & - !$OMP REDUCTION(+:COSSUM) & - !$OMP REDUCTION(+:SINSUM) - do I = 1,Nr_atoms - DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) - ! We re-use these in the next loop... - SINLIST(I) = sin(DOT) - COSLIST(I) = cos(DOT) - COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) - SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) - enddo - !$OMP END PARALLEL DO - COSSUM2 = COSSUM*COSSUM - SINSUM2 = SINSUM*SINSUM - - ! Add up energy and force contributions - - KEPREF = KECONST*PREFACTOR - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) - do I = 1,Nr_atoms - COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - enddo - !$OMP END PARALLEL DO - - KEPREF = KEPREF*(COSSUM2 + SINSUM2) - endif + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + !$OMP END PARALLEL DO + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif enddo - enddo -enddo - -! Point self energy -CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; -COULOMBV = COULOMBV - CORRFACT*DELTAQ; - -end subroutine Ewald_k_Space_latte - -subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) - -implicit none - -integer, parameter :: PREC = 8 -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 -real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 -real(PREC), parameter :: pi = 3.14159265358979323846264D0 -real(PREC), parameter :: SQRTPI = 1.772453850905516D0 -integer, intent(in) :: Nr_atoms, Max_Nr_Neigh -real(PREC), intent(in) :: COULACC, TIMERATIO -real(PREC) :: KECONST, TFACT, RELPERM -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) -real(PREC) :: COULCUT, COULCUT2 -real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) -real(PREC) :: Ra(3), Rb(3), dR, Rab(3) -real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z -real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE -real(PREC) :: CORRFACT,FOURCALPHA2, FORCE -real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) -real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR -real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 - -integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - -COULVOL = LBox(1)*LBox(2)*LBox(3) -SQRTX = sqrt(-log(COULACC)) - -ccnt = 0 - -COULCUT = 12.0D0 -CALPHA = SQRTX/COULCUT - -COULCUT2 = COULCUT*COULCUT -KCUTOFF = TWO*CALPHA*SQRTX -KCUTOFF2 = KCUTOFF*KCUTOFF -CALPHA2 = CALPHA*CALPHA -FOURCALPHA2 = FOUR*CALPHA2 - -RECIPVECS = ZERO -RECIPVECS(1,1) = TWO*pi/LBox(1) -RECIPVECS(2,2) = TWO*pi/LBox(2) -RECIPVECS(3,3) = TWO*pi/LBox(3) -LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) -MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) -NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) - -RELPERM = 1.D0 -KECONST = 14.3996437701414D0*RELPERM - -FCOUL = ZERO -COULOMBV = ZERO -SINLIST = ZERO -COSLIST = ZERO - -do L = 0,LMAX - - if (L.eq.0) then - MMIN = 0 - else - MMIN = -MMAX - endif - - L11 = L*RECIPVECS(1,1) - L12 = L*RECIPVECS(1,2) - L13 = L*RECIPVECS(1,3) - - do M = MMIN,MMAX + enddo + enddo + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte + + subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh + real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/LBox(1) + RECIPVECS(2,2) = TWO*pi/LBox(2) + RECIPVECS(3,3) = TWO*pi/LBox(3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + FCOUL = ZERO + COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX NMIN = -NMAX if ((L==0).and.(M==0)) then @@ -702,50 +702,50 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI M23 = L13 + M*RECIPVECS(2,3) do N = NMIN,NMAX - K(1) = M21 + N*RECIPVECS(3,1) - K(2) = M22 + N*RECIPVECS(3,2) - K(3) = M23 + N*RECIPVECS(3,3) - K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) - if (K2.le.KCUTOFF2) then - PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) - PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); - - COSSUM = 0.D0 - SINSUM = 0.D0 - - ! Doing the sin and cos sums - do I = 1,Nr_atoms - DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) - ! We re-use these in the next loop... - SINLIST(I) = sin(DOT) - COSLIST(I) = cos(DOT) - COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) - SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) - enddo - COSSUM2 = COSSUM*COSSUM - SINSUM2 = SINSUM*SINSUM - - ! Add up energy and force contributions - - KEPREF = KECONST*PREFACTOR - do I = 1,Nr_atoms - COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) - FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) - FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) - FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) - enddo - - KEPREF = KEPREF*(COSSUM2 + SINSUM2) - endif + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + COSSUM = 0.D0 + SINSUM = 0.D0 + + ! Doing the sin and cos sums + do I = 1,Nr_atoms + DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + COSSUM2 = COSSUM*COSSUM + SINSUM2 = SINSUM*SINSUM + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + do I = 1,Nr_atoms + COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) + FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) + FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) + FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) + enddo + + KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif enddo - enddo -enddo + enddo + enddo -! Point self energy -CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; -COULOMBV = COULOMBV - CORRFACT*DELTAQ; + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; -end subroutine Ewald_k_Space + end subroutine Ewald_k_Space end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 2e217a98..76bd2bc2 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -30,7 +30,7 @@ module prg_implicit_fermi_mod contains !> Recursive Implicit Fermi Dirac for finite temperature. - !! \param Inv_bml Inverses generated by algorithm. + !! \param Inv_bml Inverses generated by algorithm. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. !! \param nsteps Number of recursion steps. @@ -40,8 +40,8 @@ module prg_implicit_fermi_mod !! \param occErrLimit Occupation error limit. !! \param threshold Threshold for multiplication. !! \param tol Tolerance for linear system solver. - !! \param SCF_IT The current SCF iteration. - !! \param occiter Counts the total nr of DM calculations during MD. + !! \param SCF_IT The current SCF iteration. + !! \param occiter Counts the total nr of DM calculations during MD. !! See \cite{niklasson2003} subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, & mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter) @@ -58,8 +58,8 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, integer, intent(inout) :: occiter type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml - real(dp) :: trdPdmu, trP0, occErr, alpha - real(dp) :: cnst, ofactor, mustep + real(dp) :: trdPdmu, trP0, occErr, alpha + real(dp) :: cnst, ofactor, mustep real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type integer :: N, M, i, iter, muadj, prev @@ -75,7 +75,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, I_bml) - call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) + call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) occErr = 10.0_dp alpha = 1.0_dp @@ -97,10 +97,10 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) else ! Otherwise use previous inverse as starting guess - call bml_copy(Inv_bml(1),ai_bml) + call bml_copy(Inv_bml(1),ai_bml) end if - do while (occErr .gt. occErrLimit .or. muadj .eq. 1) + do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < 50) iter = iter + 1 muadj = 0 write(*,*) 'mu =', mu @@ -109,18 +109,18 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_copy(h_bml, p_bml) call prg_normalize_implicit_fermi(p_bml, cnst, mu) - do i = 1, nsteps - call bml_multiply_x2(p_bml, p2_bml, threshold, trace) - ! Y = 2*(P2-P) + I - call bml_copy(p2_bml, y_bml) - call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) - call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) - ! Find inverse ai = (2*(P2-P)+I)^-1 - !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) - call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) - call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation - enddo + do i = 1, nsteps + call bml_multiply_x2(p_bml, p2_bml, threshold, trace) + ! Y = 2*(P2-P) + I + call bml_copy(p2_bml, y_bml) + call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) + call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) + ! Find inverse ai = (2*(P2-P)+I)^-1 + !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) + call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + enddo trdPdmu = bml_trace(p_bml) trP0 = trdPdmu @@ -129,40 +129,44 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, occErr = abs(trP0 - nocc) write(*,*) 'occerr =', nocc-trP0 - ! If occupation error is too large, do bisection method - if (occerr > 10.0_dp) then - if (nocc-trP0 < 0.0_dp) then - if (prev .eq. 1) then - alpha = alpha/2 - endif - prev = -1 - mu = mu - alpha - else - if (prev .eq. -1) then - alpha = alpha/2 - endif - prev = 1 - mu = mu + alpha - endif - ! Otherwise do Newton + ! If occupation error is too large, do bisection method + if (occerr > 10.0_dp) then + if (nocc-trP0 < 0.0_dp) then + if (prev .eq. 1) then + alpha = alpha/2 + endif + prev = -1 + mu = mu - alpha + else + if (prev .eq. -1) then + alpha = alpha/2 + endif + prev = 1 + mu = mu + alpha + endif + ! Otherwise do Newton else if (occErr .gt. occErrLimit) then mustep = (nocc -trP0)/trdPdmu if (abs(mustep) > 1.0) then mustep = 0.1_dp*mustep - end if - mu = mu + mustep + end if + mu = mu + mustep muadj = 1 end if enddo - ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. - ! For now we recompute the DM one extra time if mu was adjusted. + + if (iter .ge. 50) then + write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' + end if + ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. + ! For now we recompute the DM one extra time if mu was adjusted. !if (muadj .eq. 1) then - ! Adjust occupation - ! call bml_copy(p_bml, d_bml) - ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) - ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) - ! ofactor = ((nocc - trP0)/trdPdmu) * beta - ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + ! Adjust occupation + ! call bml_copy(p_bml, d_bml) + ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) + ! ofactor = ((nocc - trP0)/trdPdmu) * beta + ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) !end if occiter = occiter + iter call bml_scale(2.0_dp,p_bml) @@ -177,7 +181,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_deallocate(I_bml) end subroutine prg_implicit_fermi_save_inverse - + !> Recursive Implicit Fermi Dirac for finite temperature. !! \param h_bml Input Hamiltonian matrix. !! \param p_bml Output density matrix. @@ -216,17 +220,12 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & N = bml_get_N(h_bml) M = bml_get_M(h_bml) - call bml_print_matrix("h_bml",h_bml,1,10,1,10) - call bml_print_matrix("p_bml",p_bml,1,10,1,10) - write(*,*) nsteps, k, nocc, & - mu, beta, method, osteps, occErrLimit, threshold, tol - !stop allocate(trace(2)) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, p2_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, d_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, w_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux1_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, aux2_bml) endif @@ -292,8 +291,10 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if write(*,*) "mu =", mu enddo @@ -304,7 +305,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("P adjusted occupation",p_bml,0,10,0,10) deallocate(trace) @@ -313,7 +314,7 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & call bml_deallocate(w_bml) call bml_deallocate(d_bml) call bml_deallocate(y_bml) - if (k .gt. 2) then + if (k .ge. 2) then call bml_deallocate(aux1_bml) call bml_deallocate(aux2_bml) endif @@ -438,7 +439,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm integer, intent(in) :: nsteps type(bml_matrix_t) :: B_bml, C_bml, C0_bml character(20) :: bml_type - real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst + real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst integer :: N, M, i, j, k bml_type = bml_get_type(H0_bml) @@ -451,46 +452,46 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm cnst = beta/(2**(2+nsteps)) - ! P0 = 0.5*II - cnst*(H0-mu0*II) - call bml_copy(H0_bml, P0_bml) - call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) - - ! P1 = - cnst*H1 - call bml_copy(H1_bml, P1_bml) - call bml_scale(-1.0_dp*cnst, P1_bml) - do i = 1, nsteps - - ! Calculate coefficient matrices - ! C0 = P0^2 - call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) - ! C = P0*P1+P1*P0, B = 2(P1 - C) - call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) - call bml_copy(P1_bml, B_bml) - call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) - ! Get next P0 - call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) - ! Get next P1 - ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) - call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) - call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) - enddo + ! P0 = 0.5*II - cnst*(H0-mu0*II) + call bml_copy(H0_bml, P0_bml) + call prg_normalize_implicit_fermi(P0_bml, cnst, mu0) + + ! P1 = - cnst*H1 + call bml_copy(H1_bml, P1_bml) + call bml_scale(-1.0_dp*cnst, P1_bml) + do i = 1, nsteps + + ! Calculate coefficient matrices + ! C0 = P0^2 + call bml_multiply(P0_bml, P0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! C = P0*P1+P1*P0, B = 2(P1 - C) + call bml_multiply(P0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_multiply(P1_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_copy(P1_bml, B_bml) + call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P0 + call bml_multiply(Inv_bml(i), C0_bml, P0_bml, 1.0_dp, 0.0_dp, threshold) + ! Get next P1 + ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1) + call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + enddo - ! dPdmu = beta*P0(I-P0) - call bml_copy(P0_bml, B_bml) - call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) - call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_scale(beta,C_bml) - dPdmu_trace = bml_trace(C_bml) - p1_trace = bml_trace(P1_bml) - mu1 = - p1_trace/dPdmu_trace - if (abs(dPdmu_trace) > 1e-8) then - call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) - endif + ! dPdmu = beta*P0(I-P0) + call bml_copy(P0_bml, B_bml) + call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + call bml_scale(beta,C_bml) + dPdmu_trace = bml_trace(C_bml) + p1_trace = bml_trace(P1_bml) + mu1 = - p1_trace/dPdmu_trace + if (abs(dPdmu_trace) > 1e-8) then + call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + endif - call bml_deallocate(B_bml) - call bml_deallocate(C_bml) - call bml_deallocate(C0_bml) + call bml_deallocate(B_bml) + call bml_deallocate(C_bml) + call bml_deallocate(C0_bml) end subroutine prg_implicit_fermi_first_order_response @@ -513,7 +514,7 @@ end subroutine prg_implicit_fermi_first_order_response !! See \cite{niklasson2015} subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, & nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold) - + implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, H2_bml, H3_bml @@ -868,7 +869,8 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th N2 = N*N scaled_tol = tol*N do while(err > scaled_tol) - !write(*,*) 'iter = ', i + !write(*,*) 'iter = ', i + !write(*,*) 'ns error =', err call bml_copy(ai_bml, tmp_bml) call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold) call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold) @@ -876,12 +878,12 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th err = bml_fnorm(r_bml) !write(*,*) "err = ", err !write(*,*) "prev_err = ", prev_err - if (10*prev_err < err) then + if (10*prev_err < err) then write(*,*) 'NS did not converge, calling conjugate gradient' call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) - else + else call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) - endif + endif i = i + 1 enddo !write(*,*) "Number of NS iterations:", i @@ -1004,7 +1006,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold) r_norm_old = r_norm_new r_norm_new = bml_sum_squares(tmp_bml) - if (k .gt. 500) then + if (k .gt. 50) then write(*,*) "Conjugate gradient is not converging" stop endif @@ -1124,8 +1126,10 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr trP0 = trdPdmu trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu - mu = mu + (nocc - trP0)/trdPdmu occErr = abs(trP0 - nocc) + if (occErr .gt. occErrLimit) then + mu = mu + (nocc - trP0)/trdPdmu + end if !write(*,*) "mu = ", mu enddo @@ -1136,7 +1140,7 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr call bml_multiply(p_bml, aux_bml, aux1_bml, 1.0_dp, 0.0_dp, threshold) ofactor = ((nocc - trP0)/trdPdmu) * beta - call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) + !call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold) !call bml_print_matrix("Diagonalization - Adjusted occupation",p_bml,0,10,0,10) call bml_deallocate(eigenvectors_bml) diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index 54f4ecf1..a19cb5b2 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -1,7 +1,7 @@ !> Pre-conditioned O(N) calculation of the kernel for XL-BOMD. !! \ingroup PROGRESS -!! \brief Here are subroutines implementing Niklasson's scheme for -!! low-rank, Krylov subspace approximation of the kernel. +!! \brief Here are subroutines implementing Niklasson's scheme for +!! low-rank, Krylov subspace approximation of the kernel. module prg_xlbokernel_mod use omp_lib @@ -11,7 +11,7 @@ module prg_xlbokernel_mod use prg_timer_mod use prg_parallel_mod use prg_ewald_mod - use prg_implicit_fermi_mod + use prg_implicit_fermi_mod implicit none @@ -27,809 +27,810 @@ module prg_xlbokernel_mod contains -subroutine Invert(A,AI,N) - -implicit none -integer, parameter :: PREC = 8 -integer, intent(in) :: N -real(PREC), intent(in) :: A(N,N) -real(PREC), intent(out) :: AI(N,N) -real(PREC) :: WORK(N+N*N)!, C(N,N) -integer :: LDA, LWORK, M, INFO, IPIV(N) -integer :: I,J,K - -external DGETRF -external DGETRI - -AI = A -LDA = N -M = N -LWORK = N+N*N - -call DGETRF(M, N, AI, LDA, IPIV, INFO) -call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) - -end subroutine Invert - -!> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) -!! \param KRes The low rank approximation -!! \param KK0_bml The pre-conditioner K0. -!! \param Res The residual q[n]-n -!! \param FelTol Relative error tolerance for approximation -!! \param L Number of vectors used. -!! \param LMAX Maximum nr of vectors to use. -!! \param NUMRANK Nr of vectors to use. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param mu The chemical potiential. -!! \param beta Scaled inverse temperature. -!! \param RXYZ Nuclear coordinates. -!! \param Box Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Pointer List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy. -!! \param nebcoul Neighbour lists. -!! \param totnebcoul Number of neighbours list. -!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Nocc Occupation. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param Nr_elem Number of elements in Hubbard list. -subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & - S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) - -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + subroutine Invert(A,AI,N) + + implicit none + integer, parameter :: PREC = 8 + integer, intent(in) :: N + real(PREC), intent(in) :: A(N,N) + real(PREC), intent(out) :: AI(N,N) + real(PREC) :: WORK(N+N*N)!, C(N,N) + integer :: LDA, LWORK, M, INFO, IPIV(N) + integer :: I,J,K + + external DGETRF + external DGETRI + + AI = A + LDA = N + M = N + LWORK = N+N*N + + call DGETRF(M, N, AI, LDA, IPIV, INFO) + call DGETRI(N, AI, N, IPIV, WORK, LWORK, INFO) + + end subroutine Invert + + !> Compute low rank approximation of (K0*J)^(-1)*K0*(q[n]-n)(for LATTE) + !! \param KRes The low rank approximation + !! \param KK0_bml The pre-conditioner K0. + !! \param Res The residual q[n]-n + !! \param FelTol Relative error tolerance for approximation + !! \param L Number of vectors used. + !! \param LMAX Maximum nr of vectors to use. + !! \param NUMRANK Nr of vectors to use. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param mu The chemical potiential. + !! \param beta Scaled inverse temperature. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy. + !! \param nebcoul Neighbour lists. + !! \param totnebcoul Number of neighbours list. + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Nocc Occupation. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param DO_bml, D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) + real(dp), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(20) :: bml_type + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It,N,MN + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp, start, finish + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + + call timer_prg_init() + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + + call bml_transpose(Z_bml,ZT_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_set_row(Res_bml,1,Res,1.0_dp*1e-10) + call bml_transpose(KK0_bml,KK0T_bml) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish + I = I + 1 + !write(*,*) 'dr =', norm2(dr) + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i + + ! Compute H1 = H(v) + dq_v = v + call prg_timer_start(1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & + dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu - real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) - real(dp), intent(in) :: Hubbard_U(Nr_elem) - type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(20) :: bml_type - integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(inout) :: KRes(Nr_atoms) - integer :: I,J,K,It,N,MN - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp, start, finish - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml - - call timer_prg_init() - bml_type = bml_get_type(HO_bml) - N = bml_get_N(HO_bml) - MN = bml_get_M(HO_bml) - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) - - call bml_transpose(Z_bml,ZT_bml) - - ! K0Res = KK0*Res temporary for matrix-vector multiplication - call bml_set_row(Res_bml,1,Res,threshold) - call bml_transpose(KK0_bml,KK0T_bml) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) - call bml_get_row(K0Res_bml,1,row_NA) - K0Res = row_NA - dr = K0Res - - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call prg_timer_stop(1,1) - ! Compute H1 = H(v) - dq_v = v - call prg_timer_start(1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space_latte(Coulomb_Pot_Real_I,J,RXYZ,Box, & - dq_v,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_elem) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - call prg_timer_stop(1,1) + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) - call bml_deallocate(H1_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,HDIM,MN,H1_bml) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1,Nr_atoms-1 + do K = Hinxlist(It)+1,Hinxlist(It+1) + row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1,Nr_atoms-1 - do K = Hinxlist(It)+1,Hinxlist(It+1) - row1(K) = Hubbard_U(Element_Pointer(It))*dq_v(It) + Coulomb_Pot_dq_v(It) - enddo - enddo - !$OMP END PARALLEL DO - do K = Hinxlist(Nr_atoms)+1,HDIM - row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) - enddo - - ! H1 = 1/2(S*H1+H1*S) - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(X_bml,H1_bml) - - ! H1 = Z^T H1 Z - call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - ! D1 = Z D1 Z^T - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - ! Compute dq/dv - call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) - call bml_get_diagonal(X_bml,row1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1, Nr_atoms-1 - dq_dv(It) = 0 - do K = Hinxlist(It)+1,Hinxlist(It+1) + ! H1 = 1/2(S*H1+H1*S) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) + + ! H1 = Z^T H1 Z + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute D1 = F_FD(HO_bml + eps*H1_bml)/eps at eps = 0 + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + ! D1 = Z D1 Z^T + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + ! Compute dq/dv + call bml_multiply(D1_bml,S_bml,X_bml, 1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) dq_dv(It) = dq_dv(It) + row1(K) - enddo - enddo - !$OMP END PARALLEL DO - dq_dv(Nr_atoms) = 0 - do K = Hinxlist(Nr_atoms)+1,HDIM - dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,1.0_dp*1e-10) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,1.0_dp*1e-10) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) + enddo - dr = dq_dv - v - ! fi = K0(dq_dv - v) - call bml_set_row(Res_bml,1,dr,threshold) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) + call bml_deallocate(ZT_bml) + call prg_timer_shutdown() + + end subroutine prg_kernel_multirank_latte + + ! Above routine but for development code + subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(inout) :: KRes(Nr_atoms) + integer :: I,J,K,It + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + + ! K0Res = KK0*Res temporary for matrix-vector multiplication + call bml_transpose(KK0_bml,KK0T_bml) + call bml_set_row(Res_bml,1,Res,ONE*1e-14) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) call bml_get_row(K0Res_bml,1,row_NA) - dr = row_NA - fi(:,I) = dr - - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo - enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*K0Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) - enddo - enddo - Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - deallocate(O, M) + K0Res = row_NA + dr = K0Res + + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i - enddo + dq_v = v - deallocate(row1);deallocate(row2);deallocate(row_NA) - call bml_deallocate(KK0T_bml) - call bml_deallocate(K0Res_bml) - call bml_deallocate(Res_bml) - call bml_deallocate(ZT_bml) - call prg_timer_shutdown() + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO - end subroutine prg_kernel_multirank_latte + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - ! Above routine but for development code - subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & - S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu - real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) - real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) - type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(10), intent(in) :: Element_Type(Nr_atoms) - integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(inout) :: KRes(Nr_atoms) - integer :: I,J,K,It - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml - - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix("ellpack",bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) - - ! K0Res = KK0*Res temporary for matrix-vector multiplication - call bml_transpose(KK0_bml,KK0T_bml) - call bml_set_row(Res_bml,1,Res,ONE*1e-14) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,ONE*1e-14) - call bml_get_row(K0Res_bml,1,row_NA) - K0Res = row_NA - dr = K0Res - - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) - dq_v = v + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & - dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - row1 = 0.0_dp - do J = 1,HDIM - call bml_set_row(H1_bml,J,row1,threshold) - enddo + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) - do J = 1,Nr_atoms - do K = H_INDEX_START(J),H_INDEX_END(J) - row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) - call bml_get_row(S_bml,K,row1) - call bml_get_row(D1_bml,K,row2) - dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + dr = dq_dv - v + ! fi = K0(dq_dv - v) + call bml_set_row(Res_bml,1,dr,threshold) + call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) + call bml_get_row(K0Res_bml,1,row_NA) + dr = row_NA + fi(:,I) = dr + + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*K0Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + deallocate(O, M) - dr = dq_dv - v - ! fi = K0(dq_dv - v) - call bml_set_row(Res_bml,1,dr,threshold) - call bml_multiply(Res_bml,KK0T_bml,K0Res_bml,1.0_dp,0.0_dp,threshold) - call bml_get_row(K0Res_bml,1,row_NA) - dr = row_NA - fi(:,I) = dr - - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*K0Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),K0Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (K0*J)^(-1)*K0*(q[n]-n) - enddo - enddo - Fel = norm2(IdentRes-K0Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - deallocate(O, M) - - enddo - deallocate(row1);deallocate(row2);deallocate(row_NA) - call bml_deallocate(KK0T_bml) - call bml_deallocate(K0Res_bml) - call bml_deallocate(Res_bml) + deallocate(row1);deallocate(row2);deallocate(row_NA) + call bml_deallocate(KK0T_bml) + call bml_deallocate(K0Res_bml) + call bml_deallocate(Res_bml) end subroutine prg_kernel_multirank -!> Compute full inverse Jacobian of q[n]-n (for LATTE) -!! \param KK The inverse Jacobian. -!! \param DO_bml Orthogonalized density matrix. -!! \param mu0 The chemical potiential. -!! \param RXYZ Nuclear coordinates. -!! \param Box Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Pointer List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy -!! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param Nocc Occupation. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param beta Scaled inverse temperature. -!! \param Nr_elem Number of elements in Hubbard list. -subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & -Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & -Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & -Nocc,m_rec,threshold,beta,Nr_elem) - -use bml - -implicit none -integer, parameter :: PREC = 8, dp = kind(1.0d0) -integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 -real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K -real(PREC), intent(in) :: Coulomb_acc, threshold,beta -real(PREC) :: v(Nr_atoms) -real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) -integer, intent(in) :: Hinxlist(HDIM) -real(PREC), intent(in) :: Hubbard_U(Nr_elem) -type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) -type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml -type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml -real(PREC), intent(inout) :: mu0 -integer, intent(in) :: Element_Pointer(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) -real(PREC) :: dq_v(Nr_atoms) -real(PREC) :: dq_dv(Nr_atoms), err,tol -real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) -integer :: I,J,K, ITER, mm,It,N,MN -real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) -type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml -character(20) :: bml_type - - bml_type = bml_get_type(HO_bml) - N = bml_get_N(HO_bml) - MN = bml_get_M(HO_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) - call bml_transpose(Z_bml,ZT_bml) - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) - Coulomb_Pot_dq_v = ZERO - Coulomb_Pot_k = ZERO - dq_v = ZERO - JJ = ZERO - KK = ZERO - - do J = 1,Nr_atoms - dq_v(J) = ONE - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) - do I = 1,Nr_atoms + !> Compute full inverse Jacobian of q[n]-n (for LATTE) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RXYZ Nuclear coordinates. + !! \param Box Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Pointer List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param Hinxlist List to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param Nr_elem Number of elements in Hubbard list. + subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & + Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & + Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Nocc,m_rec,threshold,beta,Nr_elem) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, threshold,beta + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) + integer, intent(in) :: Hinxlist(HDIM) + real(PREC), intent(in) :: Hubbard_U(Nr_elem) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + real(PREC), intent(inout) :: mu0 + integer, intent(in) :: Element_Pointer(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms), err,tol + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It,N,MN + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml + character(20) :: bml_type + + bml_type = bml_get_type(HO_bml) + N = bml_get_N(HO_bml) + MN = bml_get_M(HO_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_transpose(Z_bml,ZT_bml) + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO + dq_v = ZERO + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & - dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & - Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - call bml_deallocate(H1_bml) - call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) - do I = 1,Nr_atoms-1 - do K = Hinxlist(I)+1,Hinxlist(I+1) - row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) - enddo - enddo - !$OMP END PARALLEL DO - do K = Hinxlist(Nr_atoms)+1,HDIM - row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) - enddo + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & + Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + call bml_deallocate(H1_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms-1 + do K = Hinxlist(I)+1,Hinxlist(I+1) + row1(K) = Hubbard_U(Element_Pointer(I))*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + do K = Hinxlist(Nr_atoms)+1,HDIM + row1(K) = Hubbard_U(Element_Pointer(Nr_atoms))*dq_v(Nr_atoms) + Coulomb_Pot_dq_v(Nr_atoms) + enddo - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(X_bml,H1_bml) + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,X_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(X_bml,H1_bml) - call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(ZT_bml,H1_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) - call bml_get_diagonal(X_bml,row1) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) - do It = 1, Nr_atoms-1 - dq_dv(It) = 0 - do K = Hinxlist(It)+1,Hinxlist(It+1) + call bml_multiply(D1_bml,S_bml,X_bml,1.0_dp,0.0_dp,threshold) + call bml_get_diagonal(X_bml,row1) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K) + do It = 1, Nr_atoms-1 + dq_dv(It) = 0 + do K = Hinxlist(It)+1,Hinxlist(It+1) dq_dv(It) = dq_dv(It) + row1(K) - enddo - JJ(It,J) = dq_dv(It) - enddo - !$OMP END PARALLEL DO - dq_dv(Nr_atoms) = 0 - do K = Hinxlist(Nr_atoms)+1,HDIM - dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(It,J) = dq_dv(It) + enddo + !$OMP END PARALLEL DO + dq_dv(Nr_atoms) = 0 + do K = Hinxlist(Nr_atoms)+1,HDIM + dq_dv(Nr_atoms) = dq_dv(Nr_atoms) + row1(K) + enddo + JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + dq_v = ZERO enddo - JJ(Nr_atoms,J) = dq_dv(Nr_atoms) + + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + + call Invert(JJ,KK,Nr_atoms) + + deallocate(row1); deallocate(row2); deallocate(JJ) + call bml_deallocate(ZT_bml) + + end subroutine prg_full_kernel_latte + + !> Compute full inverse Jacobian of q[n]-n (for development code) + !! \param KK The inverse Jacobian. + !! \param DO_bml Orthogonalized density matrix. + !! \param mu0 The chemical potiential. + !! \param RX,RY,RZ Nuclear coordinates. + !! \param Lbox Box dimensions. + !! \param Hubbard_U Hubbard U list. + !! \param Element_Type List to keep track of elements. + !! \param Nr_atoms The number of atoms. + !! \param HDIM Hamiltonian matrix dimension. + !! \param Max_Nr_Neigh Max neighbours for Ewald. + !! \param Coulomb_acc Coulomb accuracy + !! \param TIMERATIO Parameter for Ewald + !! \param nnRx,nnRy,nnRz Neighbour lists. + !! \param nrnnlist Number of neighbours list. + !! \param nnType Refers to original order of atoms. + !! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. + !! \param S_bml The S matrix. + !! \param Z_bml, The Z matrix. + !! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. + !! \param HO_bml, Orthogonalized Hamiltonian matrix. + !! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. + !! \param Nocc Occupation. + !! \param Znuc List of nuclear charges. + !! \param m_rec Number of recursion steps. + !! \param threshold Threshold value for matrix truncation. + !! \param beta Scaled inverse temperature. + !! \param diagonal Auxillary vector. + + subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & + nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Nocc,Znuc,m_rec,threshold,beta,diagonal) + + use bml + + implicit none + integer, parameter :: PREC = 8, dp = kind(1.0d0) + integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K + real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta + real(PREC) :: v(Nr_atoms) + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) + type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + real(PREC), intent(inout) :: mu0, diagonal(HDIM) + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) + real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(PREC) :: dq_v(Nr_atoms) + real(PREC) :: dq_dv(Nr_atoms) + real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) + integer :: I,J,K, ITER, mm,It + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + + allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + Coulomb_Pot_dq_v = ZERO + Coulomb_Pot_k = ZERO dq_v = ZERO - enddo - - do I = 1,Nr_atoms - JJ(I,I) = JJ(I,I) - ONE - enddo - - call Invert(JJ,KK,Nr_atoms) - - deallocate(row1); deallocate(row2); deallocate(JJ) - call bml_deallocate(ZT_bml) - -end subroutine prg_full_kernel_latte - -!> Compute full inverse Jacobian of q[n]-n (for development code) -!! \param KK The inverse Jacobian. -!! \param DO_bml Orthogonalized density matrix. -!! \param mu0 The chemical potiential. -!! \param RX,RY,RZ Nuclear coordinates. -!! \param Lbox Box dimensions. -!! \param Hubbard_U Hubbard U list. -!! \param Element_Type List to keep track of elements. -!! \param Nr_atoms The number of atoms. -!! \param HDIM Hamiltonian matrix dimension. -!! \param Max_Nr_Neigh Max neighbours for Ewald. -!! \param Coulomb_acc Coulomb accuracy -!! \param TIMERATIO Parameter for Ewald -!! \param nnRx,nnRy,nnRz Neighbour lists. -!! \param nrnnlist Number of neighbours list. -!! \param nnType Refers to original order of atoms. -!! \param H_INDEX_START, H_INDEX_END Lists to keep track of atomic positions in the Hamiltonian. -!! \param S_bml The S matrix. -!! \param Z_bml, The Z matrix. -!! \param Inv_bml, Inverses generated by prg_implicit_fermi_save_inverse. -!! \param HO_bml, Orthogonalized Hamiltonian matrix. -!! \param D1_bml, H1_bml, Y_bml, X_bml Auxillary matrices. -!! \param Nocc Occupation. -!! \param Znuc List of nuclear charges. -!! \param m_rec Number of recursion steps. -!! \param threshold Threshold value for matrix truncation. -!! \param beta Scaled inverse temperature. -!! \param diagonal Auxillary vector. - -subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & -Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz, & -nrnnlist,nnType,H_INDEX_START,H_INDEX_END,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & -Nocc,Znuc,m_rec,threshold,beta,diagonal) - -use bml - -implicit none -integer, parameter :: PREC = 8, dp = kind(1.0d0) -integer, intent(in) :: Nr_atoms, HDIM, Nocc, Max_Nr_Neigh,m_rec -real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 -real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K -real(PREC), intent(in) :: Coulomb_acc, TIMERATIO,threshold,beta -real(PREC) :: v(Nr_atoms) -real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) -integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) -real(PREC), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) -type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) -type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml -type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml -real(PREC), intent(inout) :: mu0, diagonal(HDIM) -character(10), intent(in) :: Element_Type(Nr_atoms) -integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) -real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) -real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) -real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) -real(PREC) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) -real(PREC) :: dq_v(Nr_atoms) -real(PREC) :: dq_dv(Nr_atoms) -real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) -integer :: I,J,K, ITER, mm,It -real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) - - allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) - Coulomb_Pot_dq_v = ZERO - Coulomb_Pot_k = ZERO - dq_v = ZERO - JJ = ZERO - KK = ZERO - - do J = 1,Nr_atoms - dq_v(J) = ONE - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) - do I = 1,Nr_atoms + JJ = ZERO + KK = ZERO + + do J = 1,Nr_atoms + dq_v(J) = ONE + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) + do I = 1,Nr_atoms call Ewald_Real_Space_Single(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,I,RX,RY,RZ,LBox, & - dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) + dq_v,J,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,HDIM,Max_Nr_Neigh) Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & - TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - diagonal = 0.0_dp - do I = 1,HDIM - call bml_set_row(H1_bml,I,diagonal,threshold) - enddo - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) - do I = 1,Nr_atoms - do K = H_INDEX_START(I),H_INDEX_END(I) - diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,diagonal,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) + enddo + !$OMP END PARALLEL DO + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + + diagonal = 0.0_dp + do I = 1,HDIM + call bml_set_row(H1_bml,I,diagonal,threshold) + enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,K) + do I = 1,Nr_atoms + do K = H_INDEX_START(I),H_INDEX_END(I) + diagonal(K) = Hubbard_U(I)*dq_v(I) + Coulomb_Pot_dq_v(I) + enddo + enddo + !$OMP END PARALLEL DO + + call bml_set_diagonal(H1_bml,diagonal,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) + + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec,mu0,beta,real(nocc,PREC),threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) call bml_get_row(S_bml,K,row1) call bml_get_row(D1_bml,K,row2) dq_dv(It) = dq_dv(It) + dot_product(row1,row2) - enddo - JJ(It,J) = dq_dv(It) + enddo + JJ(It,J) = dq_dv(It) + enddo + dq_v = ZERO enddo - dq_v = ZERO - enddo - do I = 1,Nr_atoms - JJ(I,I) = JJ(I,I) - ONE - enddo - call Invert(JJ,KK,Nr_atoms) - deallocate(row1); deallocate(row2); deallocate(JJ) + do I = 1,Nr_atoms + JJ(I,I) = JJ(I,I) - ONE + enddo + call Invert(JJ,KK,Nr_atoms) + deallocate(row1); deallocate(row2); deallocate(JJ) + + end subroutine prg_full_kernel + + + ! Compute the low-rank kernel matrix. (For development code) + subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & + Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & + S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + + !! Res = q[n] - n + !! KK0 is preconditioner + !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + + implicit none + integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) + real(dp), intent(in) :: Res(Nr_atoms) + integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) + real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) + type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) + real(dp) :: K0Res(Nr_atoms) + type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: KK0_bml + real(dp), intent(in) :: threshold + integer, intent(in) :: LMAX + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) + real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) + real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) + real(dp) :: dq_v(Nr_atoms) + real(dp), intent(out) :: KRes(Nr_atoms) + integer :: I,J,K,It,col,row + integer, intent(out) :: L + real(dp) :: Fel, proj_tmp,elem + real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) + real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) + real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) + + allocate(row1(HDIM));allocate(row2(HDIM)); + + dr = Res + I = 0 + Fel = 1.D0 + do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish + I = I + 1 + vi(:,I) = dr/norm2(dr) + do J = 1,I-1 + vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] + enddo + vi(:,I) = vi(:,I)/norm2(vi(:,I)) + v(:) = vi(:,I) ! v_i -end subroutine prg_full_kernel + dq_v = v + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) + do J = 1,Nr_atoms + call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & + dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) + Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I + enddo + !$OMP END PARALLEL DO -! Compute the low-rank kernel matrix. (For development code) - subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & - Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,H_INDEX_START,H_INDEX_END, & - S_bml,Z_bml,Nocc,Znuc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold) + call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k -!! Res = q[n] - n -!! KK0 is preconditioner -!! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] + row1 = 0.0_dp + do J = 1,HDIM + call bml_set_row(H1_bml,J,row1,threshold) + enddo - implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec - real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu - real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) - real(dp), intent(in) :: Res(Nr_atoms) - integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) - real(dp), intent(in) :: Znuc(Nr_atoms), Hubbard_U(Nr_atoms) - type(bml_matrix_t), intent(in) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec) - real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml - type(bml_matrix_t),intent(inout) :: KK0_bml - real(dp), intent(in) :: threshold - integer, intent(in) :: LMAX - character(10), intent(in) :: Element_Type(Nr_atoms) - integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) - real(dp), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) - real(dp) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms),dq_dv(Nr_atoms) - real(dp) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) - real(dp) :: Coulomb_Force_Real_I(3),Coulomb_Force_k(3,Nr_atoms) - real(dp) :: dq_v(Nr_atoms) - real(dp), intent(out) :: KRes(Nr_atoms) - integer :: I,J,K,It,col,row - integer, intent(out) :: L - real(dp) :: Fel, proj_tmp,elem - real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) - real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) - real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:) - - allocate(row1(HDIM));allocate(row2(HDIM)); - - dr = Res - I = 0 - Fel = 1.D0 - do while ((Fel > FelTol).AND.(I < (LMAX))) !! Fel = "Error" in Swedish - I = I + 1 - vi(:,I) = dr/norm2(dr) - do J = 1,I-1 - vi(:,I) = vi(:,I) - dot_product(vi(:,I),vi(:,J))*vi(:,J) !! Orthogonalized v_i as in Eq. (42) Ref. [*] - enddo - vi(:,I) = vi(:,I)/norm2(vi(:,I)) - v(:) = vi(:,I) ! v_i + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) + do J = 1,Nr_atoms + do K = H_INDEX_START(J),H_INDEX_END(J) + row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) + enddo + enddo + !$OMP END PARALLEL DO - dq_v = v + call bml_set_diagonal(H1_bml,row1,threshold) + call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) + call bml_copy(D1_bml,H1_bml) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I,Coulomb_Force_Real_I) - do J = 1,Nr_atoms - call Ewald_Real_Space(Coulomb_Pot_Real_I,Coulomb_Force_Real_I,J,RX,RY,RZ,LBox, & - dq_v,Hubbard_U,Element_Type,Nr_atoms,Coulomb_acc,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) - Coulomb_Pot_Real(J) = Coulomb_Pot_Real_I - enddo - !$OMP END PARALLEL DO - - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k - - row1 = 0.0_dp - do J = 1,HDIM - call bml_set_row(H1_bml,J,row1,threshold) - enddo + call bml_transpose(Z_bml,X_bml) + call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(J,K) - do J = 1,Nr_atoms - do K = H_INDEX_START(J),H_INDEX_END(J) - row1(K) = Hubbard_U(J)*dq_v(J) + Coulomb_Pot_dq_v(J) - enddo - enddo - !$OMP END PARALLEL DO - - call bml_set_diagonal(H1_bml,row1,threshold) - call bml_multiply(S_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(H1_bml,S_bml,D1_bml,0.5_dp,0.5_dp,threshold) - call bml_copy(D1_bml,H1_bml) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(X_bml,H1_bml,D1_bml,1.0_dp,0.0_dp,threshold) - call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) - - call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) - - call bml_transpose(Z_bml,X_bml) - call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) - call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) - do It = 1, Nr_atoms - dq_dv(It) = 0 - do K = H_INDEX_START(It), H_INDEX_END(It) - call bml_get_row(S_bml,K,row1) - call bml_get_row(D1_bml,K,row2) - dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & + m_rec, mu, beta, real(nocc,dp), threshold) + + call bml_transpose(Z_bml,X_bml) + call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) + call bml_multiply(Y_bml,X_bml,D1_bml,1.0_dp,0.0_dp,threshold) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(It,K,row1,row2) + do It = 1, Nr_atoms + dq_dv(It) = 0 + do K = H_INDEX_START(It), H_INDEX_END(It) + call bml_get_row(S_bml,K,row1) + call bml_get_row(D1_bml,K,row2) + dq_dv(It) = dq_dv(It) + dot_product(row1,row2) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - dr = dq_dv - v - fi(:,I) = dr + dr = dq_dv - v + fi(:,I) = dr - L = I - allocate(O(L,L), M(L,L)) - do K = 1,L - do J = 1,L - O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) - enddo - enddo - call Invert(O,M,L) ! M = O^(-1) - IdentRes = 0.D0*Res - KRes = 0.D0 - do K = 1,L - do J = 1,L - proj_tmp = M(K,J)*dot_product(fi(:,J),Res) - IdentRes = IdentRes + proj_tmp*fi(:,K) - KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) - enddo + L = I + allocate(O(L,L), M(L,L)) + do K = 1,L + do J = 1,L + O(K,J) = dot_product(fi(:,K),fi(:,J)) ! O_KJ = < fv_i(K) | fv_i(J) > see below Eq. (31) + enddo + enddo + call Invert(O,M,L) ! M = O^(-1) + IdentRes = 0.D0*Res + KRes = 0.D0 + do K = 1,L + do J = 1,L + proj_tmp = M(K,J)*dot_product(fi(:,J),Res) + IdentRes = IdentRes + proj_tmp*fi(:,K) + KRes = KRes + proj_tmp*vi(:,K) !! KRes becomes the rank-L approximate of (J)^(-1)*(q[n]-n) + enddo + enddo + Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] + write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L + + ! Does not work, need to normalize matrix + if ((Fel > FelTol).AND.(I < (LMAX))) then + deallocate(O, M) + else + do row = 1,Nr_atoms + do col = 1,Nr_atoms + elem = 0.0 + do K = 1,L + do J = 1,L + elem = elem + M(J,K)*vi(row,J)*fi(col,K) + enddo + enddo + if (abs(elem) > threshold) then + call bml_set_element(KK0_bml,row,col,elem) + endif + enddo + enddo + deallocate(O,M) + endif enddo - Fel = norm2(IdentRes-Res)/norm2(IdentRes) !! RELATIVE RESIDUAL ERROR ESTIMATE Eq. (48) Ref. [*] - write(*,*) '# I, L, Fel = ',I,L,Fel !! Fel goes down with L - - ! Does not work, need to normalize matrix - if ((Fel > FelTol).AND.(I < (LMAX))) then - deallocate(O, M) - else - do row = 1,Nr_atoms - do col = 1,Nr_atoms - elem = 0.0 - do K = 1,L - do J = 1,L - elem = elem + M(J,K)*vi(row,J)*fi(col,K) - enddo - enddo - if (abs(elem) > threshold) then - call bml_set_element(KK0_bml,row,col,elem) - endif - enddo - enddo - deallocate(O,M) - endif - enddo deallocate(row1); deallocate(row2); end subroutine prg_kernel_matrix_multirank diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cbc484e7..f2d8d5df 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -56,6 +56,7 @@ progress_test(prg_density_T) progress_test(prg_density_T_fermi) progress_test(prg_sp2_basic) progress_test(prg_implicit_fermi) +progress_test(prg_implicit_fermi_save_inverse) progress_test(prg_sp2_alg1_dense) progress_test(prg_sp2_alg2_dense) progress_test(prg_sp2_alg1_ellpack) diff --git a/tests/src/main.F90 b/tests/src/main.F90 index cd2fe3d2..87285c05 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -31,7 +31,8 @@ program main implicit none - integer :: norb, mdim, verbose, i + integer :: norb, mdim, verbose + type(bml_matrix_t) :: inv_bml(10) type(bml_matrix_t) :: ham_bml type(bml_matrix_t) :: rho_bml, rho1_bml type(bml_matrix_t) :: rho_ortho_bml @@ -59,7 +60,7 @@ program main real(dp) :: mineval, maxeval, occerrlimit real(dp), allocatable :: gbnd(:) integer :: minsp2iter, icount, nodesPerPart, occsteps - integer :: norecs + integer :: norecs,occiter,i integer :: maxsp2iter, npts, sp2all_timer, sp2all_timer_init integer, allocatable :: pp(:), signlist(:) real(dp), allocatable :: vv(:) @@ -189,6 +190,8 @@ program main mu = 0.2_dp beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + bml_type = 'ellpack' call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) @@ -196,7 +199,7 @@ program main call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_implicit_fermi(ham_bml, rho1_bml, 10, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 10e-8_dp) + call prg_implicit_fermi(ham_bml, rho1_bml, norecs, 2, 10.0_dp, mu, beta, 0, 1, 1.0_dp, threshold, 1e-6_dp) mu = 0.2_dp @@ -205,11 +208,44 @@ program main call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) error_calc = bml_fnorm(rho1_bml) + write(*,*) error_calc if(error_calc.gt.0.1_dp)then write(*,*) "Error in Implicit Fermi expansion ","Error = ",error_calc error stop endif + case("prg_implicit_fermi_save_inverse") + + mu = 0.2_dp + beta = 4.0_dp !nocc,osteps,occerrlimit + norecs = 10 + nocc = 10.0_dp + + do i = 1,norecs + call bml_identity_matrix(bml_type,bml_element_real,dp,norb,norb,inv_bml(i)) + enddo + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) + + call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') + + call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) + write(*,*) mu + mu = 0.2_dp + call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) + + write(*,*) mu + call bml_scale(0.5_dp,rho_bml) + call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) + + error_calc = bml_fnorm(rho1_bml) + if(error_calc.gt.0.1_dp)then + write(*,*) "Error in Implicit Fermi expansion save inverse","Error = ",error_calc + error stop + endif + case("prg_sp2_basic") !Sp2 original version call prg_timer_start(loop_timer) @@ -979,6 +1015,7 @@ program main error stop endif + call prg_timer_stop(loop_timer) case("prg_buildzsparse") ! Building inverse overlap factor matrix (Lowdin method) @@ -1005,7 +1042,6 @@ program main call prg_buildZsparse(over_bml,aux_bml,1,mdim,bml_type,zk1_bml,zk2_bml,zk3_bml& &,zk4_bml,zk5_bml,zk6_bml,4,4,3,threshold,threshold,.true.,1) - call bml_add_deprecated(-1.0_dp,aux_bml,1.0_dp,zmat_bml,0.0_dp) error_calc = bml_fnorm(aux_bml) @@ -1095,6 +1131,7 @@ program main call bml_add_deprecated(-1.0_dp,rho_bml,1.0_dp,rho1_bml,0.0_dp) error_calc = bml_fnorm(rho_bml) write(*,*)error_calc + if(error_calc.gt.error_tol)then write(*,*) "Error is too high", error_calc error stop @@ -1135,8 +1172,6 @@ program main error stop endif - - case default write(*,*)"ERROR: unknown test ",test From da4eb81694380a9ab30e94739a8e79bdfd9c3049 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Fri, 16 Jul 2021 17:51:18 +0200 Subject: [PATCH 07/10] Added kernel routine --- src/CMakeLists.txt | 1 + src/prg_ewald_mod.F90 | 242 +++++++++++++++++++++++++++++++-- src/prg_implicit_fermi_mod.F90 | 125 +++++++++++------ src/prg_xlbokernel_mod.F90 | 73 ++++++---- 4 files changed, 363 insertions(+), 78 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9c30e5be..3ec8ee57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/prg_chebyshev_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_densitymatrix_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_dos_mod.mod + ${CMAKE_CURRENT_BINARY_DIR}/prg_ewald_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_extras_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_genz_mod.mod ${CMAKE_CURRENT_BINARY_DIR}/prg_graph_mod.mod diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index 95127797..5539c12d 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -15,8 +15,10 @@ module prg_ewald_mod public :: Ewald_Real_Space_Single_latte public :: Ewald_Real_Space public :: Ewald_Real_Space_latte - public :: Ewald_k_space + public :: Ewald_Real_Space_Test + public :: Ewald_k_space_latte_single public :: Ewald_k_space_latte + public :: Ewald_k_space_Test contains @@ -374,6 +376,123 @@ subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & end subroutine Ewald_Real_Space_latte + subroutine Ewald_Real_Space_Test(COULOMBV,I,RX,RY,RZ,LBox, & + DELTAQ,U,Element_Type,Nr_atoms,COULACC,nnRx,nnRy,nnRz,nrnnlist,nnType,Max_Nr_Neigh) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh, I + real(PREC), intent(in) :: COULACC + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) + real(PREC), intent(in) :: U(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + character(10), intent(in) :: Element_Type(Nr_atoms) + integer, intent(in) :: nrnnlist(Nr_atoms), nnType(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRx(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(in) :: nnRy(Nr_atoms,Max_Nr_Neigh), nnRz(Nr_atoms,Max_Nr_Neigh) + real(PREC), intent(out) :: COULOMBV + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: J,K, ccnt, nnI + + COULVOL = LBox(1)*LBox(2)*LBox(3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + COULOMBV = ZERO + + TI = TFACT*U(I) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RX(I) + Ra(2) = RY(I) + Ra(3) = RZ(I) + +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nnI,J,Rb,Rab,dR,MAGR,MAGR2,TJ,DC,Z,NUMREP_ERFC,CA) & +! !$OMP REDUCTION(+:COULOMBV) + do nnI = 1,nrnnlist(I) + Rb(1) = nnRx(I,nnI) + Rb(2) = nnRy(I,nnI) + Rb(3) = nnRz(I,nnI) + J = nnType(I,nnI) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(J) + DC = Rab/dR + + ! Not Using Numerical Recipes ERFC + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + COULOMBV = COULOMBV + DELTAQ(J)*CA + ccnt = ccnt + 1 + !TEST(ccnt) = DELTAQ(J)*CA + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Type(I).eq.Element_Type(J)) then + COULOMBV = COULOMBV - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + !TEST(ccnt) = - DELTAQ(J)*EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + COULOMBV = COULOMBV - (DELTAQ(J)*(EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo +! !$OMP END PARALLEL DO + COULOMBV = KECONST*COULOMBV + + end subroutine Ewald_Real_Space_Test + subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & DELTAQ,U,Element_Type,Nr_atoms,COULACC,TIMERATIO,nnRx,nnRy,nnRz,nrnnlist,nnType,HDIM,Max_Nr_Neigh) @@ -623,21 +742,119 @@ subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_ end subroutine Ewald_k_Space_latte - subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TIMERATIO,Max_Nr_Neigh) + subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULACC) implicit none + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, J + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3), DELTAQ(Nr_atoms) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: COULOMBV(Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2 + real(PREC) :: CORRFACT,FOURCALPHA2 + real(PREC) :: RECIPVECS(3,3) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: IDOT, JDOT, COSJDOT, SINJDOT, KEPREF + + integer :: I,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + COULOMBV = ZERO + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,IDOT,JDOT,COSJDOT,SINJDOT,L,M,N,MMIN,MMAX,NMIN,NMAX,L11,M22,K,K2,PREFACTOR,KEPREF) + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M22 = M*RECIPVECS(2,2) + + do N = NMIN,NMAX + K(1) = L11 + K(2) = M22 + K(3) = N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + KEPREF = KECONST*PREFACTOR + JDOT = K(1)*RXYZ(1,J) + K(2)*RXYZ(2,J) + K(3)*RXYZ(3,J) + SINJDOT = sin(JDOT) + COSJDOT = cos(JDOT) + do I = 1,Nr_atoms + IDOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + COULOMBV(I) = COULOMBV(I) + KEPREF*DELTAQ(J)*(COSJDOT*cos(IDOT)+SINJDOT*sin(IDOT)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Point self energy + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_latte_single + + subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) +! + implicit none +! integer, parameter :: PREC = 8 real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 real(PREC), parameter :: pi = 3.14159265358979323846264D0 real(PREC), parameter :: SQRTPI = 1.772453850905516D0 integer, intent(in) :: Nr_atoms, Max_Nr_Neigh - real(PREC), intent(in) :: COULACC, TIMERATIO + real(PREC), intent(in) :: COULACC real(PREC) :: KECONST, TFACT, RELPERM real(PREC), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3), DELTAQ(Nr_atoms) real(PREC) :: COULCUT, COULCUT2 - real(PREC), intent(out) :: COULOMBV(Nr_atoms), FCOUL(3,Nr_atoms) + real(PREC), intent(out) :: COULOMBV(Nr_atoms) real(PREC) :: Ra(3), Rb(3), dR, Rab(3) real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE @@ -647,7 +864,7 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN - +! COULVOL = LBox(1)*LBox(2)*LBox(3) SQRTX = sqrt(-log(COULACC)) @@ -673,7 +890,6 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI RELPERM = 1.D0 KECONST = 14.3996437701414D0*RELPERM - FCOUL = ZERO COULOMBV = ZERO SINLIST = ZERO COSLIST = ZERO @@ -714,6 +930,9 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI SINSUM = 0.D0 ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) & + !$OMP REDUCTION(+:COSSUM) & + !$OMP REDUCTION(+:SINSUM) do I = 1,Nr_atoms DOT = K(1)*RX(I) + K(2)*RY(I) + K(3)*RZ(I) ! We re-use these in the next loop... @@ -722,19 +941,18 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) enddo + !$OMP END PARALLEL DO COSSUM2 = COSSUM*COSSUM SINSUM2 = SINSUM*SINSUM ! Add up energy and force contributions - +! KEPREF = KECONST*PREFACTOR + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) do I = 1,Nr_atoms COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) - FORCE = KEPREF*DELTAQ(I)*(SINLIST(I)*COSSUM - COSLIST(I)*SINSUM) - FCOUL(1,I) = FCOUL(1,I) + FORCE*K(1) - FCOUL(2,I) = FCOUL(2,I) + FORCE*K(2) - FCOUL(3,I) = FCOUL(3,I) + FORCE*K(3) enddo + !$OMP END PARALLEL DO KEPREF = KEPREF*(COSSUM2 + SINSUM2) endif @@ -746,6 +964,6 @@ subroutine Ewald_k_Space(COULOMBV,FCOUL,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,TI CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; COULOMBV = COULOMBV - CORRFACT*DELTAQ; - end subroutine Ewald_k_Space + end subroutine Ewald_k_Space_Test end module prg_ewald_mod diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 76bd2bc2..37d03618 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -58,11 +58,11 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, integer, intent(inout) :: occiter type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, I_bml, ai_bml - real(dp) :: trdPdmu, trP0, occErr, alpha - real(dp) :: cnst, ofactor, mustep + real(dp) :: trdPdmu, trP0, occErr, alpha, newerr + real(dp) :: cnst, ofactor, mustep, preverr real(dp), allocatable :: trace(:), gbnd(:) character(20) :: bml_type - integer :: N, M, i, iter, muadj, prev + integer :: N, M, i, iter, muadj, prev, maxiter bml_type = bml_get_type(h_bml) N = bml_get_N(h_bml) @@ -78,9 +78,12 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_identity_matrix(bml_type, bml_element_real, dp, N, M, ai_bml) occErr = 10.0_dp - alpha = 1.0_dp + newerr = 1000_dp + preverr = 1000_dp + alpha = 8.0_dp prev = 0 iter = 0 + maxiter = 30 cnst = beta/(1.0_dp*2**(nsteps+2)) if (SCF_IT .eq. 1) then @@ -95,12 +98,15 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + do i = 1, nsteps + call bml_copy(I_bml, Inv_bml(i)) + enddo else ! Otherwise use previous inverse as starting guess call bml_copy(Inv_bml(1),ai_bml) end if - do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < 50) + do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < maxiter) iter = iter + 1 muadj = 0 write(*,*) 'mu =', mu @@ -116,46 +122,65 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) ! Find inverse ai = (2*(P2-P)+I)^-1 - !call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.01_dp, threshold) - call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux_bml, I_bml, tol, threshold) - call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) - call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation + !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, tol, threshold) + !call bml_copy(Inv_bml(i),ai_bml) + call prg_newtonschulz(y_bml, Inv_bml(i), d_bml, w_bml, aux_bml, I_bml, tol, threshold) + call bml_multiply(Inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) + !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation enddo - trdPdmu = bml_trace(p_bml) - trP0 = trdPdmu - trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 - trdPdmu = beta * trdPdmu + trP0 = bml_trace(p_bml) + trdPdmu = beta*(trP0 - bml_sum_squares(p_bml)) ! sum p(i,j)**2 occErr = abs(trP0 - nocc) - write(*,*) 'occerr =', nocc-trP0 - + write(*,*) 'occerr =', occErr + ! If occupation error is too large, do bisection method - if (occerr > 10.0_dp) then - if (nocc-trP0 < 0.0_dp) then - if (prev .eq. 1) then - alpha = alpha/2 - endif + if (occerr > 1.0_dp) then + ! if (newerr > occerr) then + if (nocc-trP0 < 0.0_dp .and. prev .eq. -1) then prev = -1 - mu = mu - alpha - else - if (prev .eq. -1) then - alpha = alpha/2 - endif + else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then prev = 1 - mu = mu + alpha + else if (nocc-trP0 > 0.0_dp .and. prev .eq. -1) then + prev = 1 + alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo + else + prev = -1 + alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo endif + !newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + !do while(newerr > occerr .and. abs(occerr-newerr)/occerr<0.1 ) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + !enddo +! write(*,*) 'newerr =', newerr + mu = mu + prev*alpha + muadj = 1 + ! Otherwise do Newton else if (occErr .gt. occErrLimit) then mustep = (nocc -trP0)/trdPdmu - if (abs(mustep) > 1.0) then - mustep = 0.1_dp*mustep - end if + !if (abs(mustep) > 1.0 .or. preverr < occErr) then + ! alpha = alpha/2 + ! mustep = alpha*mustep + !end if mu = mu + mustep muadj = 1 + preverr = occErr end if enddo - if (iter .ge. 50) then + if (iter .ge. maxiter) then write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' end if ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. @@ -432,12 +457,12 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm implicit none type(bml_matrix_t), intent(in) :: H0_bml, H1_bml, Inv_bml(nsteps) - type(bml_matrix_t), intent(inout) :: P0_bml, P1_bml + type(bml_matrix_t), intent(inout) :: P0_bml,P1_bml real(dp), intent(in) :: mu0, threshold real(dp) :: mu1 real(dp), intent(in) :: beta, nocc integer, intent(in) :: nsteps - type(bml_matrix_t) :: B_bml, C_bml, C0_bml + type(bml_matrix_t) :: B0_bml, B_bml, C_bml, C0_bml character(20) :: bml_type real(dp) :: p1_trace, dPdmu_trace, p1B_trace, mu1B, cnst integer :: N, M, i, j, k @@ -446,6 +471,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm N = bml_get_N(H0_bml) M = bml_get_M(H0_bml) + call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B0_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, B_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C_bml) call bml_zero_matrix(bml_type, bml_element_real, dp, N, M, C0_bml) @@ -477,11 +503,31 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) enddo +! do i = 1, nsteps-1 + ! D = A^-1*P0 +! call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) +! call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold) + ! B0 = A^-1*P0^2 +! call bml_copy(B_bml,B0_bml) + ! B = I + D -P0*D +! call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold) +! call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold) + ! P1 = 2D*P1(I+D-P0*D) +! call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) +! call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold) +! enddo +! call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold) +! call bml_copy(P1_bml, B_bml) +! call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P1 +! call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) +! call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + + ! dPdmu = beta*P0(I-P0) call bml_copy(P0_bml, B_bml) call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) - call bml_multiply(P0_bml, B_bml, C_bml, 1.0_dp, 0.0_dp, threshold) - call bml_scale(beta,C_bml) + call bml_multiply(P0_bml, B_bml, C_bml, beta, 0.0_dp, threshold) dPdmu_trace = bml_trace(C_bml) p1_trace = bml_trace(P1_bml) mu1 = - p1_trace/dPdmu_trace @@ -490,6 +536,7 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm endif call bml_deallocate(B_bml) + call bml_deallocate(B0_bml) call bml_deallocate(C_bml) call bml_deallocate(C0_bml) @@ -866,9 +913,7 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th N = bml_get_N(a_bml) err = 100000.0 i = 0 - N2 = N*N - scaled_tol = tol*N - do while(err > scaled_tol) + do while(err > tol) !write(*,*) 'iter = ', i !write(*,*) 'ns error =', err call bml_copy(ai_bml, tmp_bml) @@ -880,7 +925,7 @@ subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, th !write(*,*) "prev_err = ", prev_err if (10*prev_err < err) then write(*,*) 'NS did not converge, calling conjugate gradient' - call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.0001_dp, threshold) + call prg_conjgrad(a_bml, ai_bml, I_bml, r_bml, tmp_bml, d_bml, 0.00001_dp, threshold) else call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold) endif @@ -990,7 +1035,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr do while (r_norm_new .gt. cg_tol) - write(*,*) r_norm_new + ! write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then call bml_copy(tmp_bml, d_bml) @@ -1011,7 +1056,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr stop endif enddo - write(*,*) "Number of CG-iterations:", k + !write(*,*) "Number of CG-iterations:", k end subroutine prg_conjgrad diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index a19cb5b2..81496915 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -83,23 +83,23 @@ end subroutine Invert !! \param Nr_elem Number of elements in Hubbard list. subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_bml,mu,beta,RXYZ,Box,Hubbard_U,Element_Pointer, & Nr_atoms,HDIM,Max_Nr_Neigh,Coulomb_acc,nebcoul,totnebcoul,Hinxlist, & - S_bml,Z_bml,Nocc,Inv_bml,H1_bml,X_bml,Y_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) + S_bml,Z_bml,Nocc,Inv_bml,H1_bml,DO_bml,D1_bml,m_rec,threshold,Nr_elem) !! Res = q[n] - n !! KK0 is preconditioner !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec,Nr_elem,NUMRANK real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu + real(dp), intent(in) :: Coulomb_acc, FelTol, beta, mu, Nocc real(dp), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) real(dp), intent(in) :: Res(Nr_atoms) integer, intent(in) :: Hinxlist(HDIM),Element_Pointer(Nr_atoms) real(dp), intent(in) :: Hubbard_U(Nr_elem) type(bml_matrix_t), intent(inout) :: HO_bml, S_bml, Z_bml, Inv_bml(m_rec), KK0_bml real(dp) :: K0Res(Nr_atoms) - type(bml_matrix_t),intent(inout) :: H1_bml, X_bml,Y_bml,DO_bml,D1_bml + type(bml_matrix_t),intent(inout) :: H1_bml, DO_bml,D1_bml real(dp), intent(in) :: threshold integer, intent(in) :: LMAX character(20) :: bml_type @@ -115,17 +115,19 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ real(dp) :: vi(Nr_atoms,LMAX), fi(Nr_atoms,LMAX), v(Nr_atoms) real(dp) :: dr(Nr_Atoms), IdentRes(Nr_atoms) real(dp), allocatable :: O(:,:),M(:,:),row1(:),row2(:),row_NA(:) - type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml + type(bml_matrix_t) :: KK0T_bml, K0Res_bml, Res_bml, ZT_bml, T_bml, X_bml, Y_bml call timer_prg_init() bml_type = bml_get_type(HO_bml) N = bml_get_N(HO_bml) MN = bml_get_M(HO_bml) allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(row_NA(Nr_atoms)) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) - call bml_zero_matrix('csr',bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,KK0T_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,K0Res_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,Nr_atoms,Nr_atoms,Res_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) call bml_transpose(Z_bml,ZT_bml) @@ -137,6 +139,7 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ K0Res = row_NA dr = K0Res + write(*,*) 'resnorm =', norm2(Res), 'kresnorm =', norm2(dr) I = 0 Fel = 1.D0 do while ((Fel > FelTol).AND.(I < (LMAX)).AND.(I < NUMRANK)) !! Fel = "Error" in Swedish @@ -249,6 +252,8 @@ subroutine prg_kernel_multirank_latte(KRes,KK0_bml,Res,FelTol,L,LMAX,NUMRANK,HO_ call bml_deallocate(K0Res_bml) call bml_deallocate(Res_bml) call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) call prg_timer_shutdown() end subroutine prg_kernel_multirank_latte @@ -327,8 +332,8 @@ subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM @@ -433,41 +438,43 @@ end subroutine prg_kernel_multirank !! \param Nr_elem Number of elements in Hubbard list. subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & - Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml,Y_bml,X_bml, & + Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml, & Nocc,m_rec,threshold,beta,Nr_elem) use bml implicit none integer, parameter :: PREC = 8, dp = kind(1.0d0) - integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Nocc, Max_Nr_Neigh,m_rec + integer, intent(in) :: Nr_atoms, Nr_elem,HDIM, Max_Nr_Neigh,m_rec real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 real(PREC), parameter :: kB = 8.61739d-5 ! eV/K, kB = 6.33366256e-6 Ry/K, kB = 3.166811429e-6 Ha/K - real(PREC), intent(in) :: Coulomb_acc, threshold,beta + real(PREC), intent(in) :: Coulomb_acc, threshold,beta,Nocc real(PREC) :: v(Nr_atoms) real(PREC), intent(in) :: RXYZ(3,Nr_atoms),Box(3,3) integer, intent(in) :: Hinxlist(HDIM) real(PREC), intent(in) :: Hubbard_U(Nr_elem) type(bml_matrix_t), intent(in) :: Inv_bml(m_rec) type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml - type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml,Y_bml,X_bml + type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml real(PREC), intent(inout) :: mu0 integer, intent(in) :: Element_Pointer(Nr_atoms) real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) real(PREC) :: dq_v(Nr_atoms) - real(PREC) :: dq_dv(Nr_atoms), err,tol + real(PREC) :: dq_dv(Nr_atoms), err,tol,start,ewaldk,ewaldr,ewaldkacc,ewaldracc,response,respacc real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) integer :: I,J,K, ITER, mm,It,N,MN real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) - type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml + type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml, X_bml, Y_bml character(20) :: bml_type bml_type = bml_get_type(HO_bml) N = bml_get_N(HO_bml) MN = bml_get_M(HO_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,ZT_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,X_bml) + call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) call bml_transpose(Z_bml,ZT_bml) allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) Coulomb_Pot_dq_v = ZERO @@ -476,7 +483,11 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & JJ = ZERO KK = ZERO + write(*,*) 'Beginning response calculations' + ewaldracc = 0.0; ewaldkacc = 0.0; respacc = 0.0; do J = 1,Nr_atoms + write(*,*) 'J =',J + call cpu_time(start) dq_v(J) = ONE !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) do I = 1,Nr_atoms @@ -485,9 +496,12 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO - call Ewald_k_Space_latte(Coulomb_Pot_k,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc, & - Max_Nr_Neigh) + call cpu_time(ewaldr) + ewaldracc = ewaldracc + ewaldr-start + call Ewald_k_Space_latte_single(Coulomb_Pot_k,J,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc) Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + call cpu_time(ewaldk) + ewaldkacc = ewaldkacc + ewaldk-ewaldr call bml_deallocate(H1_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) @@ -512,7 +526,9 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & call bml_multiply(X_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec,mu0,beta,real(nocc,PREC),threshold) + m_rec,mu0,beta,Nocc,threshold) + call cpu_time(response) + respacc = respacc + response-ewaldk call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) call bml_multiply(Y_bml,ZT_bml,D1_bml,1.0_dp,0.0_dp,threshold) @@ -540,10 +556,15 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & JJ(I,I) = JJ(I,I) - ONE enddo + write(*,*) 'ewaldracc =', ewaldracc + write(*,*) 'ewaldkacc =', ewaldkacc + write(*,*) 'implcit response time =', respacc call Invert(JJ,KK,Nr_atoms) deallocate(row1); deallocate(row2); deallocate(JJ) call bml_deallocate(ZT_bml) + call bml_deallocate(X_bml) + call bml_deallocate(Y_bml) end subroutine prg_full_kernel_latte @@ -627,8 +648,8 @@ subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & - TIMERATIO,Max_Nr_Neigh) + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + ! TIMERATIO,Max_Nr_Neigh) Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k diagonal = 0.0_dp @@ -691,9 +712,9 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, !! KRes rank-L approximation of (K0*J)^(-1)*K0*(q[n]-n) with (K0*J)^(-1) as in Eq. (41) in Ref. [*] implicit none - integer, intent(in) :: Nr_atoms, HDIM, Nocc,Max_Nr_Neigh,m_rec + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh,m_rec real(dp), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0 - real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu + real(dp), intent(in) :: Coulomb_acc, TIMERATIO, FelTol, beta, mu,Nocc real(dp), intent(in) :: RX(Nr_atoms), RY(Nr_atoms), RZ(Nr_atoms), LBox(3) real(dp), intent(in) :: Res(Nr_atoms) integer, intent(in) :: H_INDEX_START(Nr_atoms), H_INDEX_END(Nr_atoms) @@ -745,8 +766,8 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, enddo !$OMP END PARALLEL DO - call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM @@ -771,7 +792,7 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, call bml_multiply(D1_bml,Z_bml,H1_bml,1.0_dp,0.0_dp,threshold) call prg_implicit_fermi_first_order_response(HO_bml,H1_bml,DO_bml,D1_bml,Inv_bml, & - m_rec, mu, beta, real(nocc,dp), threshold) + m_rec, mu, beta, Nocc, threshold) call bml_transpose(Z_bml,X_bml) call bml_multiply(Z_bml,D1_bml,Y_bml,2.0_dp,0.0_dp,threshold) From 1256e74740bf2b2b9f2953f70c236c9becbc1826 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Fri, 16 Jul 2021 18:23:21 +0200 Subject: [PATCH 08/10] Fixed test --- tests/src/main.F90 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/src/main.F90 b/tests/src/main.F90 index 87285c05..d1df2f04 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -231,11 +231,10 @@ program main call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) - write(*,*) mu + mu = 0.2_dp call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) - + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) write(*,*) mu call bml_scale(0.5_dp,rho_bml) call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) From 6304e52fbc1f45de355658e384d72acab31c580a Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Sat, 17 Jul 2021 20:22:08 +0200 Subject: [PATCH 09/10] Fixed indentation --- src/prg_ewald_mod.F90 | 20 ++++---- src/prg_implicit_fermi_mod.F90 | 92 +++++++++++++++++----------------- src/prg_syrotation_mod.F90 | 4 +- src/prg_xlbokernel_mod.F90 | 14 +++--- tests/src/main.F90 | 14 +++--- 5 files changed, 72 insertions(+), 72 deletions(-) diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index 5539c12d..a56de96e 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -436,8 +436,8 @@ subroutine Ewald_Real_Space_Test(COULOMBV,I,RX,RY,RZ,LBox, & Ra(2) = RY(I) Ra(3) = RZ(I) -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nnI,J,Rb,Rab,dR,MAGR,MAGR2,TJ,DC,Z,NUMREP_ERFC,CA) & -! !$OMP REDUCTION(+:COULOMBV) + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(nnI,J,Rb,Rab,dR,MAGR,MAGR2,TJ,DC,Z,NUMREP_ERFC,CA) & + ! !$OMP REDUCTION(+:COULOMBV) do nnI = 1,nrnnlist(I) Rb(1) = nnRx(I,nnI) Rb(2) = nnRy(I,nnI) @@ -488,7 +488,7 @@ subroutine Ewald_Real_Space_Test(COULOMBV,I,RX,RY,RZ,LBox, & endif enddo -! !$OMP END PARALLEL DO + ! !$OMP END PARALLEL DO COULOMBV = KECONST*COULOMBV end subroutine Ewald_Real_Space_Test @@ -815,8 +815,8 @@ subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULAC M22 = M*RECIPVECS(2,2) do N = NMIN,NMAX - K(1) = L11 - K(2) = M22 + K(1) = L11 + K(2) = M22 K(3) = N*RECIPVECS(3,3) K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) @@ -824,7 +824,7 @@ subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULAC KEPREF = KECONST*PREFACTOR JDOT = K(1)*RXYZ(1,J) + K(2)*RXYZ(2,J) + K(3)*RXYZ(3,J) SINJDOT = sin(JDOT) - COSJDOT = cos(JDOT) + COSJDOT = cos(JDOT) do I = 1,Nr_atoms IDOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) COULOMBV(I) = COULOMBV(I) + KEPREF*DELTAQ(J)*(COSJDOT*cos(IDOT)+SINJDOT*sin(IDOT)) @@ -841,9 +841,9 @@ subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULAC end subroutine Ewald_k_Space_latte_single subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) -! + ! implicit none -! + ! integer, parameter :: PREC = 8 real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 @@ -864,7 +864,7 @@ subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 integer :: I,J,L,M,N, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN -! + ! COULVOL = LBox(1)*LBox(2)*LBox(3) SQRTX = sqrt(-log(COULACC)) @@ -946,7 +946,7 @@ subroutine Ewald_k_Space_Test(COULOMBV,RX,RY,RZ,LBox,DELTAQ,Nr_atoms,COULACC,Max SINSUM2 = SINSUM*SINSUM ! Add up energy and force contributions -! + ! KEPREF = KECONST*PREFACTOR !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) do I = 1,Nr_atoms diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 37d03618..20d20f7b 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -100,7 +100,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call prg_conjgrad(y_bml, ai_bml, I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) do i = 1, nsteps call bml_copy(I_bml, Inv_bml(i)) - enddo + enddo else ! Otherwise use previous inverse as starting guess call bml_copy(Inv_bml(1),ai_bml) @@ -123,7 +123,7 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) ! Find inverse ai = (2*(P2-P)+I)^-1 !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, tol, threshold) - !call bml_copy(Inv_bml(i),ai_bml) + !call bml_copy(Inv_bml(i),ai_bml) call prg_newtonschulz(y_bml, Inv_bml(i), d_bml, w_bml, aux_bml, I_bml, tol, threshold) call bml_multiply(Inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation @@ -133,37 +133,37 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, trdPdmu = beta*(trP0 - bml_sum_squares(p_bml)) ! sum p(i,j)**2 occErr = abs(trP0 - nocc) write(*,*) 'occerr =', occErr - + ! If occupation error is too large, do bisection method if (occerr > 1.0_dp) then - ! if (newerr > occerr) then + ! if (newerr > occerr) then if (nocc-trP0 < 0.0_dp .and. prev .eq. -1) then prev = -1 - else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then + else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then prev = 1 else if (nocc-trP0 > 0.0_dp .and. prev .eq. -1) then prev = 1 alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! do while(newerr > occerr) - ! alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! enddo - else - prev = -1 - alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! do while(newerr > occerr) - ! alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! enddo + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo + else + prev = -1 + alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! do while(newerr > occerr) + ! alpha = alpha/2 + ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) + ! enddo endif !newerr = abs(trP0+prev*alpha*trdPdmu-nocc) !do while(newerr > occerr .and. abs(occerr-newerr)/occerr<0.1 ) ! alpha = alpha/2 ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) !enddo -! write(*,*) 'newerr =', newerr + ! write(*,*) 'newerr =', newerr mu = mu + prev*alpha muadj = 1 @@ -180,9 +180,9 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, end if enddo - if (iter .ge. maxiter) then - write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' - end if + if (iter .ge. maxiter) then + write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse' + end if ! Adjusting the occupation sometimes causes the perturbation calculation to not converge. ! For now we recompute the DM one extra time if mu was adjusted. !if (muadj .eq. 1) then @@ -317,9 +317,9 @@ subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, & trdPdmu = trdPdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2 trdPdmu = beta * trdPdmu occErr = abs(trP0 - nocc) - if (occErr .gt. occErrLimit) then + if (occErr .gt. occErrLimit) then mu = mu + (nocc - trP0)/trdPdmu - end if + end if write(*,*) "mu =", mu enddo @@ -503,25 +503,25 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) enddo -! do i = 1, nsteps-1 - ! D = A^-1*P0 -! call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) -! call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold) - ! B0 = A^-1*P0^2 -! call bml_copy(B_bml,B0_bml) - ! B = I + D -P0*D -! call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold) -! call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold) - ! P1 = 2D*P1(I+D-P0*D) -! call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) -! call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold) -! enddo -! call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold) -! call bml_copy(P1_bml, B_bml) -! call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) - ! Get next P1 -! call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) -! call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) + ! do i = 1, nsteps-1 + ! D = A^-1*P0 + ! call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold) + ! call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold) + ! B0 = A^-1*P0^2 + ! call bml_copy(B_bml,B0_bml) + ! B = I + D -P0*D + ! call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold) + ! call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold) + ! P1 = 2D*P1(I+D-P0*D) + ! call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold) + ! call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold) + ! enddo + ! call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold) + ! call bml_copy(P1_bml, B_bml) + ! call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold) + ! Get next P1 + ! call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold) + ! call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold) ! dPdmu = beta*P0(I-P0) @@ -1035,7 +1035,7 @@ subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, thr do while (r_norm_new .gt. cg_tol) - ! write(*,*) r_norm_new + ! write(*,*) r_norm_new k = k + 1 if (k .eq. 1) then call bml_copy(tmp_bml, d_bml) @@ -1173,8 +1173,8 @@ subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occEr trdPdmu = beta * trdPdmu occErr = abs(trP0 - nocc) if (occErr .gt. occErrLimit) then - mu = mu + (nocc - trP0)/trdPdmu - end if + mu = mu + (nocc - trP0)/trdPdmu + end if !write(*,*) "mu = ", mu enddo diff --git a/src/prg_syrotation_mod.F90 b/src/prg_syrotation_mod.F90 index 44d7f91e..16f28b7e 100644 --- a/src/prg_syrotation_mod.F90 +++ b/src/prg_syrotation_mod.F90 @@ -190,7 +190,7 @@ subroutine prg_rotate(rot,r,verbose) v2(2)=rot%v2(2) v2(3)=rot%v2(3) - vQ(1)=rot%vQ(1) !Rotation center + vQ(1)=rot%vQ(1) !Rotation center vQ(2)=rot%vQ(2) vQ(3)=rot%vQ(3) @@ -232,7 +232,7 @@ subroutine prg_rotate(rot,r,verbose) v2=pq2-vQ endif - vtr(1)=0.0_dp !Translation + vtr(1)=0.0_dp !Translation vtr(2)=0.0_dp vtr(3)=0.0_dp diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index 81496915..8b36f9fb 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -332,8 +332,8 @@ subroutine prg_kernel_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu,beta,RX enddo !$OMP END PARALLEL DO - ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM @@ -557,7 +557,7 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & enddo write(*,*) 'ewaldracc =', ewaldracc - write(*,*) 'ewaldkacc =', ewaldkacc + write(*,*) 'ewaldkacc =', ewaldkacc write(*,*) 'implcit response time =', respacc call Invert(JJ,KK,Nr_atoms) @@ -648,8 +648,8 @@ subroutine prg_full_kernel(KK,DO_bml,mu0,RX,RY,RZ,LBox,Hubbard_U,Element_Type, & Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO - ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & - ! TIMERATIO,Max_Nr_Neigh) + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc, & + ! TIMERATIO,Max_Nr_Neigh) Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k diagonal = 0.0_dp @@ -766,8 +766,8 @@ subroutine prg_kernel_matrix_multirank(KRes,KK0_bml,Res,FelTol,L,LMAX,HO_bml,mu, enddo !$OMP END PARALLEL DO - ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) - ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + ! call Ewald_k_Space(Coulomb_Pot_k,Coulomb_Force_k,RX,RY,RZ,LBox,dq_v,Nr_atoms,Coulomb_acc,TIMERATIO,Max_Nr_Neigh) + ! Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k row1 = 0.0_dp do J = 1,HDIM diff --git a/tests/src/main.F90 b/tests/src/main.F90 index 105fa49a..9e437799 100644 --- a/tests/src/main.F90 +++ b/tests/src/main.F90 @@ -32,7 +32,7 @@ program main implicit none integer :: norb, mdim, verbose - type(bml_matrix_t) :: inv_bml(10) + type(bml_matrix_t) :: inv_bml(10) type(bml_matrix_t) :: ham_bml type(bml_matrix_t) :: rho_bml, rho1_bml type(bml_matrix_t) :: rho_ortho_bml @@ -215,7 +215,7 @@ program main error stop endif - case("prg_implicit_fermi_save_inverse") + case("prg_implicit_fermi_save_inverse") mu = 0.2_dp beta = 4.0_dp !nocc,osteps,occerrlimit @@ -224,7 +224,7 @@ program main do i = 1,norecs call bml_identity_matrix(bml_type,bml_element_real,dp,norb,norb,inv_bml(i)) - enddo + enddo call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,ham_bml) @@ -232,10 +232,10 @@ program main call bml_read_matrix(ham_bml,'hamiltonian_ortho.mtx') call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,rho1_bml) - - mu = 0.2_dp + + mu = 0.2_dp call prg_implicit_fermi_save_inverse(inv_bml,ham_bml,rho_bml,norecs,nocc,mu,beta,1e-4_dp, threshold, 1e-5_dp, 1,occiter) - call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) + call prg_test_density_matrix(ham_bml,rho1_bml,beta,mu,nocc,1,1e-4_dp,threshold) write(*,*) mu call bml_scale(0.5_dp,rho_bml) call bml_add(rho1_bml,rho_bml,1.0_dp,-1.0_dp,threshold) @@ -1132,7 +1132,7 @@ program main call bml_add_deprecated(-1.0_dp,rho_bml,1.0_dp,rho1_bml,0.0_dp) error_calc = bml_fnorm(rho_bml) write(*,*)error_calc - + if(error_calc.gt.error_tol)then write(*,*) "Error is too high", error_calc From faee51d0c03c4abdc3ae1584356a99f2020fbdd1 Mon Sep 17 00:00:00 2001 From: Linnea Andersson Date: Fri, 17 Sep 2021 18:44:46 +0200 Subject: [PATCH 10/10] changes --- src/prg_ewald_mod.F90 | 255 ++++++++++++++++++++++++++++++++- src/prg_implicit_fermi_mod.F90 | 48 ++----- src/prg_xlbokernel_mod.F90 | 24 ++-- 3 files changed, 284 insertions(+), 43 deletions(-) diff --git a/src/prg_ewald_mod.F90 b/src/prg_ewald_mod.F90 index a56de96e..5dbb24ea 100644 --- a/src/prg_ewald_mod.F90 +++ b/src/prg_ewald_mod.F90 @@ -16,9 +16,11 @@ module prg_ewald_mod public :: Ewald_Real_Space public :: Ewald_Real_Space_latte public :: Ewald_Real_Space_Test + public :: Ewald_Real_Space_Matrix_latte public :: Ewald_k_space_latte_single public :: Ewald_k_space_latte public :: Ewald_k_space_Test + public :: Ewald_k_space_Matrix_latte contains @@ -259,6 +261,129 @@ subroutine Ewald_Real_Space_Single(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & end subroutine Ewald_Real_Space_Single + subroutine Ewald_Real_Space_Matrix_latte(E,RXYZ,Box,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, HDIM, Max_Nr_Neigh, Nr_Elem + real(PREC), intent(in) :: COULACC + real(PREC), intent(out) :: E(Nr_atoms,Nr_atoms) + real(PREC) :: TFACT, RELPERM, KECONST + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC), intent(in) :: U(Nr_elem) + real(PREC) :: COULCUT, COULCUT2 + integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: totnebcoul(Nr_atoms), nebcoul(4,Max_Nr_Neigh,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: NUMREP_ERFC, CA, FORCE, EXPTI, EXPTJ + real(PREC) :: TJ,TJ2,TJ3,TJ4,TJ6,TI2MTJ2A,SA,SB,SC,SD,SE,SF + real(PREC) :: TI2MTJ2, TI2MTI2, TJ2MTI2 + integer :: I,J,K, ccnt, newj, PBCI,PBCJ,PBCK + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + COULCUT = 12.D0 + CALPHA = SQRTX/COULCUT + COULCUT2 = COULCUT*COULCUT + CALPHA2 = CALPHA*CALPHA + + RELPERM = ONE + KECONST = 14.3996437701414D0*RELPERM + TFACT = 16.0D0/(5.0D0*KECONST) + + E = 0.0 + + !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(E,U,Element_Pointer,RXYZ,totnebcoul,nebcoul,coulcut,calpha) + + do I = 1,Nr_atoms + + TI = TFACT*U(Element_Pointer(I)) + TI2 = TI*TI + TI3 = TI2*TI + TI4 = TI2*TI2 + TI6 = TI4*TI2 + + SSA = TI + SSB = TI3/48.D0 + SSC = 3.D0*TI2/16.D0 + SSD = 11.D0*TI/16.D0 + SSE = 1.D0 + + Ra(1) = RXYZ(1,I) + Ra(2) = RXYZ(2,I) + Ra(3) = RXYZ(3,I) + + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + PBCI = NEBCOUL(2, NEWJ, I) + PBCJ = NEBCOUL(3, NEWJ, I) + PBCK = NEBCOUL(4, NEWJ, I) + Rb(1) = RXYZ(1,J) + REAL(PBCI)*BOX(1,1) + REAL(PBCJ)*BOX(2,1) + & + REAL(PBCK)*BOX(3,1) + + Rb(2) = RXYZ(2,J) + REAL(PBCI)*BOX(1,2) + REAL(PBCJ)*BOX(2,2) + & + REAL(PBCK)*BOX(3,2) + + Rb(3) = RXYZ(3,J) + REAL(PBCI)*BOX(1,3) + REAL(PBCJ)*BOX(2,3) + & + REAL(PBCK)*BOX(3,3) + Rab = Rb-Ra ! OBS b - a !!! + dR = norm2(Rab) + MAGR = dR + MAGR2 = dR*dR + + if ((dR <= COULCUT).and.(dR > 1e-12)) then + + TJ = TFACT*U(Element_Pointer(J)) + DC = Rab/dR + + Z = abs(CALPHA*MAGR) + NUMREP_ERFC = erfc(Z) + + CA = NUMREP_ERFC/MAGR + E(I,J) = E(I,J) + CA + ccnt = ccnt + 1 + CA = CA + TWO*CALPHA*exp( -CALPHA2*MAGR2 )/SQRTPI + EXPTI = exp(-TI*MAGR ) + + if (Element_Pointer(I).eq.Element_Pointer(J)) then + E(I,J) = E(I,J) - EXPTI*(SSB*MAGR2 + SSC*MAGR + SSD + SSE/MAGR) + ccnt = ccnt + 1 + else + TJ2 = TJ*TJ + TJ3 = TJ2*TJ + TJ4 = TJ2*TJ2 + TJ6 = TJ4*TJ2 + EXPTJ = exp( -TJ*MAGR ) + TI2MTJ2 = TI2 - TJ2 + TJ2MTI2 = -TI2MTJ2 + SA = TI + SB = TJ4*TI/(TWO*TI2MTJ2*TI2MTJ2) + SC = (TJ6 - THREE*TJ4*TI2)/(TI2MTJ2*TI2MTJ2*TI2MTJ2) + SD = TJ + SE = TI4*TJ/(TWO * TJ2MTI2 * TJ2MTI2) + SF = (TI6 - THREE*TI4*TJ2)/(TJ2MTI2*TJ2MTI2*TJ2MTI2) + + E(I,J) = E(I,J) - ((EXPTI*(SB - (SC/MAGR)) + EXPTJ*(SE - (SF/MAGR)))) + endif + + endif + enddo + enddo + !$OMP END PARALLEL DO + + E = KECONST*E + + end subroutine Ewald_Real_Space_Matrix_latte + subroutine Ewald_Real_Space_latte(COULOMBV,I,RXYZ,Box, & DELTAQ,U,Element_Pointer,Nr_atoms,COULACC,nebcoul,totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) @@ -615,7 +740,7 @@ subroutine Ewald_Real_Space(COULOMBV,FCOUL,I,RX,RY,RZ,LBox, & COULOMBV = KECONST*COULOMBV end subroutine Ewald_Real_Space - + subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_Neigh) implicit none @@ -742,6 +867,134 @@ subroutine Ewald_k_Space_latte(COULOMBV,RXYZ,Box,DELTAQ,Nr_atoms,COULACC,Max_Nr_ end subroutine Ewald_k_Space_latte + subroutine Ewald_k_Space_Matrix_latte(E,RXYZ,Box,Nr_atoms,COULACC,Max_Nr_Neigh,nebcoul,totnebcoul) + + implicit none + + integer, parameter :: PREC = 8 + real(PREC), parameter :: ONE = 1.D0, TWO = 2.D0, ZERO = 0.D0, SIX = 6.D0, THREE = 3.D0, FOUR = 4.D0 + real(PREC), parameter :: FOURTYEIGHT = 48.D0, ELEVEN = 11.D0, SIXTEEN = 16.D0, EIGHT = 8.D0 + real(PREC), parameter :: pi = 3.14159265358979323846264D0 + real(PREC), parameter :: SQRTPI = 1.772453850905516D0 + integer, intent(in) :: Nr_atoms, Max_Nr_Neigh,nebcoul(4,Max_Nr_Neigh,Nr_atoms),totnebcoul(Nr_atoms) + real(PREC), intent(in) :: COULACC + real(PREC) :: KECONST, TFACT, RELPERM + real(PREC), intent(in) :: RXYZ(3,Nr_atoms), Box(3,3) + real(PREC) :: COULCUT, COULCUT2 + real(PREC), intent(out) :: E(Nr_atoms,Nr_atoms) + real(PREC) :: Ra(3), Rb(3), dR, Rab(3) + real(PREC) :: COULVOL, SQRTX, CALPHA, DC(3), MAGR, MAGR2, Z + real(PREC) :: CALPHA2, TI,TI2,TI3,TI4,TI6,SSA,SSB,SSC,SSD,SSE + real(PREC) :: CORRFACT,FOURCALPHA2, FORCE + real(PREC) :: RECIPVECS(3,3),SINLIST(Nr_atoms),COSLIST(Nr_Atoms) + real(PREC) :: K(3),L11,L12,L13,M21,M22,M23,K2,KCUTOFF,KCUTOFF2,PREFACTOR + real(PREC) :: PREVIR, COSSUM,SINSUM,DOT, KEPREF, COSSUM2, SINSUM2 + + integer :: I,J,L,M,N, newj, ccnt, nnI, LMAX,MMAX,NMAX,NMIN,MMIN + + COULVOL = Box(1,1)*Box(2,2)*Box(3,3) + SQRTX = sqrt(-log(COULACC)) + + ccnt = 0 + + COULCUT = 12.0D0 + CALPHA = SQRTX/COULCUT + + COULCUT2 = COULCUT*COULCUT + KCUTOFF = TWO*CALPHA*SQRTX + KCUTOFF2 = KCUTOFF*KCUTOFF + CALPHA2 = CALPHA*CALPHA + FOURCALPHA2 = FOUR*CALPHA2 + + RECIPVECS = ZERO + RECIPVECS(1,1) = TWO*pi/Box(1,1) + RECIPVECS(2,2) = TWO*pi/Box(2,2) + RECIPVECS(3,3) = TWO*pi/Box(3,3) + LMAX = floor(KCUTOFF / sqrt(RECIPVECS(1,1)*RECIPVECS(1,1))) + MMAX = floor(KCUTOFF / sqrt(RECIPVECS(2,2)*RECIPVECS(2,2))) + NMAX = floor(KCUTOFF / sqrt(RECIPVECS(3,3)*RECIPVECS(3,3))) + + RELPERM = 1.D0 + KECONST = 14.3996437701414D0*RELPERM + + !COULOMBV = ZERO + SINLIST = ZERO + COSLIST = ZERO + + E = 0.0 + + do L = 0,LMAX + + if (L.eq.0) then + MMIN = 0 + else + MMIN = -MMAX + endif + + L11 = L*RECIPVECS(1,1) + L12 = L*RECIPVECS(1,2) + L13 = L*RECIPVECS(1,3) + + do M = MMIN,MMAX + + NMIN = -NMAX + if ((L==0).and.(M==0)) then + NMIN = 1 + endif + + M21 = L11 + M*RECIPVECS(2,1) + M22 = L12 + M*RECIPVECS(2,2) + M23 = L13 + M*RECIPVECS(2,3) + + do N = NMIN,NMAX + K(1) = M21 + N*RECIPVECS(3,1) + K(2) = M22 + N*RECIPVECS(3,2) + K(3) = M23 + N*RECIPVECS(3,3) + K2 = K(1)*K(1) + K(2)*K(2) + K(3)*K(3) + if (K2.le.KCUTOFF2) then + PREFACTOR = EIGHT*pi*exp(-K2/(4.D0*CALPHA2))/(COULVOL*K2) + PREVIR = (2.D0/K2) + (2.D0/(4.D0*CALPHA2)); + + + ! Doing the sin and cos sums + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,DOT) + do I = 1,Nr_atoms + DOT = K(1)*RXYZ(1,I) + K(2)*RXYZ(2,I) + K(3)*RXYZ(3,I) + ! We re-use these in the next loop... + SINLIST(I) = sin(DOT) + COSLIST(I) = cos(DOT) + ! COSSUM = COSSUM + DELTAQ(I)*COSLIST(I) + ! SINSUM = SINSUM + DELTAQ(I)*SINLIST(I) + enddo + !$OMP END PARALLEL DO + + ! Add up energy and force contributions + + KEPREF = KECONST*PREFACTOR + CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,J) + do I = 1,Nr_atoms + do newj = 1,totnebcoul(I) + J = NEBCOUL(1, NEWJ, I) + E(I,J) = E(I,J) + KEPREF*(COSLIST(I)*COSLIST(J)+SINLIST(I)*SINLIST(J)) + ! COULOMBV(I) = COULOMBV(I) + KEPREF*(COSLIST(I)*COSSUM + SINLIST(I)*SINSUM) + enddo + E(I,I) = E(I,I) - CORRFACT + enddo + !$OMP END PARALLEL DO + + !KEPREF = KEPREF*(COSSUM2 + SINSUM2) + endif + enddo + enddo + enddo + + ! Point self energy + !CORRFACT = 2.D0*KECONST*CALPHA/SQRTPI; + !COULOMBV = COULOMBV - CORRFACT*DELTAQ; + + end subroutine Ewald_k_Space_Matrix_latte + subroutine Ewald_k_Space_latte_single(COULOMBV,J,RXYZ,Box,DELTAQ,Nr_atoms,COULACC) implicit none diff --git a/src/prg_implicit_fermi_mod.F90 b/src/prg_implicit_fermi_mod.F90 index 20d20f7b..633ea1c8 100644 --- a/src/prg_implicit_fermi_mod.F90 +++ b/src/prg_implicit_fermi_mod.F90 @@ -80,13 +80,14 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, occErr = 10.0_dp newerr = 1000_dp preverr = 1000_dp - alpha = 8.0_dp - prev = 0 + alpha = 1.0_dp + prev = 1 iter = 0 maxiter = 30 cnst = beta/(1.0_dp*2**(nsteps+2)) if (SCF_IT .eq. 1) then + alpha = 4.0_dp ! Normalization ! P0 = 0.5*I - cnst*(H0-mu0*I) call bml_copy(h_bml, p_bml) @@ -101,9 +102,6 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, do i = 1, nsteps call bml_copy(I_bml, Inv_bml(i)) enddo - else - ! Otherwise use previous inverse as starting guess - call bml_copy(Inv_bml(1),ai_bml) end if do while ((occErr .gt. occErrLimit .or. muadj .eq. 1) .and. iter < maxiter) @@ -122,8 +120,11 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold) call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold) ! Find inverse ai = (2*(P2-P)+I)^-1 - !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, tol, threshold) + !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, w_bml, d_bml, aux_bml, tol, threshold) !call bml_copy(Inv_bml(i),ai_bml) + if (iter .eq. 1) then + call prg_conjgrad(y_bml, Inv_bml(i), I_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold) + endif call prg_newtonschulz(y_bml, Inv_bml(i), d_bml, w_bml, aux_bml, I_bml, tol, threshold) call bml_multiply(Inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold) !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation @@ -136,7 +137,6 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, ! If occupation error is too large, do bisection method if (occerr > 1.0_dp) then - ! if (newerr > occerr) then if (nocc-trP0 < 0.0_dp .and. prev .eq. -1) then prev = -1 else if (nocc-trP0 > 0.0_dp .and. prev .eq. 1) then @@ -144,36 +144,16 @@ subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, else if (nocc-trP0 > 0.0_dp .and. prev .eq. -1) then prev = 1 alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! do while(newerr > occerr) - ! alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! enddo else prev = -1 alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! do while(newerr > occerr) - ! alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - ! enddo endif - !newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - !do while(newerr > occerr .and. abs(occerr-newerr)/occerr<0.1 ) - ! alpha = alpha/2 - ! newerr = abs(trP0+prev*alpha*trdPdmu-nocc) - !enddo - ! write(*,*) 'newerr =', newerr mu = mu + prev*alpha muadj = 1 ! Otherwise do Newton else if (occErr .gt. occErrLimit) then mustep = (nocc -trP0)/trdPdmu - !if (abs(mustep) > 1.0 .or. preverr < occErr) then - ! alpha = alpha/2 - ! mustep = alpha*mustep - !end if mu = mu + mustep muadj = 1 preverr = occErr @@ -525,14 +505,14 @@ subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bm ! dPdmu = beta*P0(I-P0) - call bml_copy(P0_bml, B_bml) - call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) - call bml_multiply(P0_bml, B_bml, C_bml, beta, 0.0_dp, threshold) - dPdmu_trace = bml_trace(C_bml) - p1_trace = bml_trace(P1_bml) - mu1 = - p1_trace/dPdmu_trace + !call bml_copy(P0_bml, B_bml) + !call bml_scale_add_identity(B_bml, -1.0_dp, 1.0_dp, threshold) + !call bml_multiply(P0_bml, B_bml, C_bml, beta, 0.0_dp, threshold) + !dPdmu_trace = bml_trace(C_bml) + !p1_trace = bml_trace(P1_bml) + !mu1 = - p1_trace/dPdmu_trace if (abs(dPdmu_trace) > 1e-8) then - call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) + !call bml_add(P1_bml,C_bml,1.0_dp,mu1,threshold) endif call bml_deallocate(B_bml) diff --git a/src/prg_xlbokernel_mod.F90 b/src/prg_xlbokernel_mod.F90 index 8b36f9fb..3a25c8ce 100644 --- a/src/prg_xlbokernel_mod.F90 +++ b/src/prg_xlbokernel_mod.F90 @@ -439,7 +439,7 @@ end subroutine prg_kernel_multirank subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & Element_Pointer, Nr_atoms,HDIM, Max_Nr_Neigh,Coulomb_acc, & Hinxlist,S_bml,Z_bml,Inv_bml,D1_bml,H1_bml,HO_bml, & - Nocc,m_rec,threshold,beta,Nr_elem) + Nocc,m_rec,threshold,beta,Nr_elem,nebcoul,totnebcoul) use bml @@ -457,7 +457,7 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & type(bml_matrix_t), intent(in) :: S_bml,Z_bml,HO_bml type(bml_matrix_t),intent(inout) :: H1_bml,DO_bml,D1_bml real(PREC), intent(inout) :: mu0 - integer, intent(in) :: Element_Pointer(Nr_atoms) + integer, intent(in) :: Element_Pointer(Nr_atoms),nebcoul(4,Max_Nr_Neigh,Nr_atoms),totnebcoul(Nr_atoms) real(PREC) :: Coulomb_Pot_Real(Nr_atoms), Coulomb_Pot(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_I, Coulomb_Pot_k(Nr_atoms) real(PREC) :: Coulomb_Pot_Real_dq_v(Nr_atoms), Coulomb_Pot_dq_v(Nr_atoms) @@ -465,7 +465,7 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & real(PREC) :: dq_dv(Nr_atoms), err,tol,start,ewaldk,ewaldr,ewaldkacc,ewaldracc,response,respacc real(PREC), intent(out) :: KK(Nr_atoms,Nr_atoms) integer :: I,J,K, ITER, mm,It,N,MN - real(PREC),allocatable :: row1(:),row2(:),JJ(:,:) + real(PREC),allocatable :: row1(:),row2(:),JJ(:,:),E(:,:),EReal(:,:),EKspace(:,:) type(bml_matrix_t) :: ZT_bml, R_bml, JJI_bml, JJ_bml, tmp_bml, X_bml, Y_bml character(20) :: bml_type @@ -477,12 +477,18 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,Y_bml) call bml_transpose(Z_bml,ZT_bml) allocate(row1(HDIM)); allocate(row2(HDIM)); allocate(JJ(Nr_atoms,Nr_atoms)) + allocate(EReal(Nr_atoms,Nr_atoms)); allocate(EKspace(Nr_atoms,Nr_atoms)) + allocate(E(Nr_atoms,Nr_atoms)) Coulomb_Pot_dq_v = ZERO Coulomb_Pot_k = ZERO dq_v = ZERO JJ = ZERO KK = ZERO + call Ewald_Real_Space_Matrix_latte(EReal,RXYZ,Box,Hubbard_U,Element_Pointer, & + Nr_atoms,Coulomb_acc,Nebcoul,Totnebcoul,HDIM,Max_Nr_Neigh,Nr_Elem) + call Ewald_k_Space_Matrix_latte(EKspace,RXYZ,Box,Nr_atoms,Coulomb_acc,Max_Nr_Neigh,Nebcoul,Totnebcoul) + E = EReal + EKspace write(*,*) 'Beginning response calculations' ewaldracc = 0.0; ewaldkacc = 0.0; respacc = 0.0; do J = 1,Nr_atoms @@ -491,17 +497,18 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & dq_v(J) = ONE !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I,Coulomb_Pot_Real_I) do I = 1,Nr_atoms - call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & - dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) - Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I + !call Ewald_Real_Space_Single_latte(Coulomb_Pot_Real_I,I,RXYZ,Box,Nr_elem, & + ! dq_v,J,Hubbard_U,Element_Pointer,Nr_atoms,Coulomb_acc,HDIM,Max_Nr_Neigh) + !Coulomb_Pot_Real(I) = Coulomb_Pot_Real_I enddo !$OMP END PARALLEL DO call cpu_time(ewaldr) ewaldracc = ewaldracc + ewaldr-start - call Ewald_k_Space_latte_single(Coulomb_Pot_k,J,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc) - Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k + !call Ewald_k_Space_latte_single(Coulomb_Pot_k,J,RXYZ,Box,dq_v,Nr_atoms,Coulomb_acc) + !Coulomb_Pot_dq_v = Coulomb_Pot_Real+Coulomb_Pot_k call cpu_time(ewaldk) ewaldkacc = ewaldkacc + ewaldk-ewaldr + Coulomb_Pot_dq_v = matmul(E,dq_v) call bml_deallocate(H1_bml) call bml_zero_matrix(bml_type,bml_element_real,dp,N,MN,H1_bml) @@ -562,6 +569,7 @@ subroutine prg_full_kernel_latte(KK,DO_bml,mu0,RXYZ,Box,Hubbard_U, & call Invert(JJ,KK,Nr_atoms) deallocate(row1); deallocate(row2); deallocate(JJ) + deallocate(E); deallocate(EReal); deallocate(EKspace) call bml_deallocate(ZT_bml) call bml_deallocate(X_bml) call bml_deallocate(Y_bml)