wannier-developers · mostofi · Mar 29, 2017 · Mar 28, 2017 · Mar 29, 2017 · Mar 29, 2017
diff --git a/src/postw90/postw90_common.F90 b/src/postw90/postw90_common.F90
@@ -241,6 +241,7 @@ subroutine pw90common_wanint_param_dist
     call comms_bcast(num_wann,1)
     call comms_bcast(timing_level,1)
     call comms_bcast(iprint,1)
+    call comms_bcast(ws_distance_tol,1)
 !    call comms_bcast(num_atoms,1)   ! Ivo: not used in postw90, right?
 !    call comms_bcast(num_species,1) ! Ivo: not used in postw90, right?
     call comms_bcast(real_lattice(1,1),9)
@@ -371,7 +372,9 @@ subroutine pw90common_wanint_param_dist
                call io_error('Error allocating kpt_latt in postw90_param_dist')
        endif
     end if
-    call comms_bcast(fermi_energy_list(1),nfermi)
+    if (nfermi /= 0) then
+       call comms_bcast(fermi_energy_list(1),nfermi)
+    end if
     call comms_bcast(kubo_freq_list(1),kubo_nfreq)
     call comms_bcast(dos_project(1),num_dos_project)
     if(.not.effective_model) then
@@ -437,12 +440,21 @@ subroutine pw90common_wanint_data_dist
                                io_date,io_time,io_stopwatch
     use w90_parameters, only : num_wann,num_kpts,num_bands,have_disentangled,&
                                u_matrix_opt,u_matrix,m_matrix,&
-                               ndimwin,lwindow,nntot
+                               ndimwin,lwindow,nntot,wannier_centres
 
     implicit none
 
     integer :: ierr,loop_kpt,m,i,j
 
+    if (.not.on_root) then
+       ! wannier_centres is allocated in param_read, so only on root node
+       ! It is then read in param_read_chpkt
+       ! Therefore, now we need to allocate it on all nodes, and then broadcast it
+       allocate(wannier_centres(3,num_wann),stat=ierr)
+       if (ierr/=0) call io_error('Error allocating wannier_centres in pw90common_wanint_data_dist')
+    end if
+    call comms_bcast(wannier_centres(1,1),3*num_wann)
+
     ! -------------------
     ! Ivo: added 8april11
     ! -------------------

diff --git a/src/ws_distance.F90 b/src/ws_distance.F90
@@ -12,22 +12,15 @@
 ! https://github.com/wannier-developers/wannier90            !
 !------------------------------------------------------------!
 !                                                            !
-! Copyright (C) 2016 Lorenzo Paulatto                        !
+! Original implementation by Lorenzo Paulatto, with later    !
+! modifications by Marco Gibertini, Dominik Gresch           !
+! and Giovanni Pizzi                                         !
 !                                                            !
 !------------------------------------------------------------!
 
 module w90_ws_distance
-  !! This module computes the optimal Wigner-Seitz cell to use for interpolation.
-
-  !     if( allocated( wdist_ndeg ) ) then
-  !        deallocate( wdist_ndeg, stat=ierr  )
-  !        if (ierr/=0) call io_error('Error in deallocating wdist_ndeg in param_dealloc')
-  !     end if
-  !     if( allocated( irdist_ws ) ) then
-  !        deallocate( irdist_ws, stat=ierr  )
-  !        if (ierr/=0) call io_error('Error in deallocating irdist_ws in param_dealloc')
-  !     end if
-
+  !! This module computes the optimal Wigner-Seitz cell around each Wannier
+  !! function to use for interpolation.
   use w90_constants, only : dp
   use w90_parameters, only : use_ws_distance, ws_distance_tol
 
@@ -38,38 +31,49 @@ module w90_ws_distance
   public :: ws_translate_dist, clean_ws_translate, ws_write_vec
   !
   integer, public, save, allocatable :: irdist_ws(:,:,:,:,:)!(3,ndegenx,num_wann,num_wann,nrpts)
-  !! The number of unit cells to shift WF j to put its centre inside the Wigner-Seitz
-  !! of wannier function i. If several shifts are equivalent (i.e. they take the function
-  !! on the edge of the WS) they are all listed
+  !! The integer number of unit cells to shift Wannier function j to put its centre 
+  !! inside the Wigner-Seitz of wannier function i. If several shifts are 
+  !! equivalent (i.e. they take the function on the edge of the WS) they are 
+  !! all listed. First index: xyz, second index: number of degenerate shifts,
+  !! third and fourth indices: i,j; fifth index: index on the R vector.
   real(DP), public, save, allocatable :: crdist_ws(:,:,:,:,:)!(3,ndegenx,num_wann,num_wann,nrpts)
+  !! Cartesian version if irdist_ws, in angstrom
   integer, public, save, allocatable :: wdist_ndeg(:,:,:)!(num_wann,num_wann,nrpts)
-  !! The number of equivalent shifts (see above)
+  !! The number of equivalent vectors for each set of (i,j,R) (that is, loops on
+  !! the second index of irdist_ws(:,:,i,j,R) go from 1 to wdist_ndeg(i,j,R))
   !
-  ! next parameter moved to parameters, used here
-  !logical, save, public :: use_ws_distance = .false.
   logical, public, save :: done_ws_distance = .false.
+  !! Global variable to know if the properties were already calculated, and avoid
+  !! recalculating them when the [[ws_translate_dist]] function is called multiple times
 
   integer, parameter :: ndegenx = 8 
   !! max number of unit cells that can touch
   !! in a single point (i.e.  vertex of cube)
 
-  integer,parameter :: far = 3
-  !! How far shold we look? It depends on how far the WFs have been wandering
-  !! from their unit-cell, or if the cell is very slanted. 2 is almost always enough,
-  !! 3 is enough in every case I have ever met
-
 contains
 
 ! Short documentation follows, for a longer explanation see the documentation
 ! of the use_ws_distance variable in the user guide.
+!
+! Some comments:
+! 1. we are using a 3x3x3 supercell to search for 'neighbors'. We should 
+! probably make this more robust.
+! 2. This computation is done independently on all processors (when run in 
+!    parallel). I think this shouldn't do a problem as the math is fairly simple
+!    and uses data already broadcasted (integer values, and the 
+!    wannier_centres), but if there is the risk of having different 
+!    degeneracies or similar things on different MPI processors, we should 
+!    probably think to do the math on node 0, and then broadcast results.
 
   subroutine ws_translate_dist(nrpts, irvec, force_recompute)
-  !! The next three subroutines find the supercell translation (i.e. the translation
-  !! by a integer number of supercell) That minimizes the distance between two given funtions,
-  !! i and j, the first in unit cell 0, the other in unit cell R.
-  !! I.e. we put  WF j in the Wigner-Seitz of WF i.
-  !! We also look for the number of equivalent translation, meaning that j is on the edge of the WS
-  !! The results are stored in global arrays wdist_ndeg and irdist_ws
+  !! Find the supercell translation (i.e. the translation by a integer number of
+  !! supercell vectors, the supercell being defined by the mp_grid) that 
+  !! minimizes the distance between two given Wannier functions, i and j, 
+  !! the first in unit cell 0, the other in unit cell R. 
+  !! I.e., we find the translation to put WF j in the Wigner-Seitz of WF i.
+  !! We also look for the number of equivalent translation, that happen when w_j,R 
+  !! is on the edge of the WS of w_i,0. The results are stored in global 
+  !! arrays wdist_ndeg, irdist_ws, crdist_ws.
 
     use w90_parameters, only : num_wann,wannier_centres, real_lattice, &
          recip_lattice, iprint
@@ -85,8 +89,8 @@ subroutine ws_translate_dist(nrpts, irvec, force_recompute)
 
     ! <<<local variables>>>
     integer  :: iw, jw, ideg, ir, ierr
-    real(DP),allocatable :: wdist_wssc_frac(:,:,:,:), irdist_real(:,:,:,:,:)
-    real(DP) :: irvec_cart(3)
+    integer :: shifts(3,ndegenx)
+    real(DP) :: irvec_cart(3), tmp(3), tmp_frac(3), R_out(3,ndegenx)
 
     ! The subroutine does nothing if called more than once, which may
     ! not be the best thing if you invoke it while the WFs are moving
@@ -98,151 +102,157 @@ subroutine ws_translate_dist(nrpts, irvec, force_recompute)
     if(done_ws_distance) return
     done_ws_distance = .true.
 
-    if (ndegenx*num_wann*nrpts<=0) call io_error("unexpected dimensions in ws_translate_dist")
+    if (ndegenx*num_wann*nrpts<=0) then
+      call io_error("unexpected dimensions in ws_translate_dist")
+    end if
 
-   allocate(irdist_ws(3,ndegenx,num_wann,num_wann,nrpts),stat=ierr)
-   if (ierr/=0) call io_error('Error in allocating irdist_ws in ws_translate_dist')
-   allocate(crdist_ws(3,ndegenx,num_wann,num_wann,nrpts),stat=ierr)
-   if (ierr/=0) call io_error('Error in allocating crdist_ws in ws_translate_dist')
-   allocate(wdist_ndeg(num_wann,num_wann,nrpts),stat=ierr)
-   if (ierr/=0) call io_error('Error in allocating wcenter_ndeg in ws_translate_dist')
+    allocate(irdist_ws(3,ndegenx,num_wann,num_wann,nrpts),stat=ierr)
+    if (ierr/=0) call io_error('Error in allocating irdist_ws in ws_translate_dist')
+    allocate(crdist_ws(3,ndegenx,num_wann,num_wann,nrpts),stat=ierr)
+    if (ierr/=0) call io_error('Error in allocating crdist_ws in ws_translate_dist')
+    allocate(wdist_ndeg(num_wann,num_wann,nrpts),stat=ierr)
+    if (ierr/=0) call io_error('Error in allocating wcenter_ndeg in ws_translate_dist')
 
-   ALLOCATE(wdist_wssc_frac(3,num_wann,num_wann,nrpts))
-   ALLOCATE(irdist_real(3,ndegenx,num_wann,num_wann,nrpts))
-
     !translation_centre_frac = 0._dp
     wdist_ndeg   = 0
     irdist_ws   = 0
     crdist_ws = 0
-    irdist_real = 0
-    ! take to WS cell
-    !     write(stdout,'(1x,a)') 'Translated centres'
-    !     write(stdout,'(4x,a,3f10.6)') 'translation centre in fractional coordinate:',translation_centre_frac(:)
+
     do ir =1,nrpts
        do jw=1,num_wann
           do iw=1,num_wann
-             call utility_frac_to_cart(DBLE(irvec(:,ir)),irvec_cart,real_lattice)
+             call utility_frac_to_cart(REAL(irvec(:,ir),kind=dp),irvec_cart,real_lattice)
              ! function IW translated in the Wigner-Size around function JW
-             wdist_wssc_frac(:,iw,jw,ir) = R_wz_sc( -wannier_centres(:,iw)&
-                  +(irvec_cart+wannier_centres(:,jw)), (/0._dp,0._dp,0._dp/) )
-             !find its degeneracy
-             CALL R_wz_sc_equiv(wdist_wssc_frac(:,iw,jw,ir), (/0._dp,0._dp,0._dp/), &
-                  wdist_ndeg(iw,jw,ir), crdist_ws(:,:,iw,jw,ir))
-             IF(wdist_ndeg(iw,jw,ir)>ndegenx) call io_error('surprising ndeg')
+             ! and also find its degeneracy, and the integer shifts needed
+             ! to identify it
+             ! Note: the routine outputs R_out, but we don't really need it
+             ! This is kept in case in the future we might want to use it
+             ! R_out contains the actual vector between the two WFs. We 
+             ! calculate instead crdist_ws, that is the Bravais lattice vector 
+             ! between two supercell lattices, that is the only one we need
+             ! later for interpolation etc.
+             CALL R_wz_sc( -wannier_centres(:,iw)&
+                  +(irvec_cart+wannier_centres(:,jw)), (/0._dp,0._dp,0._dp/), &
+                  wdist_ndeg(iw,jw,ir), R_out, shifts)
              do ideg = 1,wdist_ndeg(iw,jw,ir)
-                crdist_ws(:,ideg,iw,jw,ir) = crdist_ws(:,ideg,iw,jw,ir)&
-                     +wannier_centres(:,iw)-wannier_centres(:,jw)
-                !
-                call utility_cart_to_frac(crdist_ws(:,ideg,iw,jw,ir),&
-                     irdist_real(:,ideg,iw,jw,ir),recip_lattice)
+                irdist_ws(:,ideg,iw,jw,ir) = irvec(:,ir) + shifts(:,ideg)
+                tmp_frac = REAL(irdist_ws(:,ideg,iw,jw,ir),kind=dp)
+                CALL utility_frac_to_cart(tmp_frac,tmp,real_lattice)
+                crdist_ws(:,ideg,iw,jw,ir) = tmp
              enddo
           enddo
        enddo
     enddo
-    ! apply translation
-    irdist_ws = NINT(irdist_real)
-    !
-    IF(iprint>3)then
-       write(stdout,'(1x,a78)') repeat('-',78)
-       do ir=1,nrpts
-          write(stdout,'(i5)') ir
-          do iw=1,num_wann
-             write(stdout,'("deg:",100i2)') wdist_ndeg(:,iw,ir)
-          enddo
-       enddo
-       write(stdout,'(1x,a78)') repeat('-',78)
-    endif
-    ! sanity check for isdist_ws
-    IF(ANY(ABS(DBLE(irdist_ws)-irdist_real)>0.04)) THEN
-         call io_error('wrong irdist_ws')
-    ENDIF
-    !
-    DEALLOCATE(wdist_wssc_frac, irdist_real)
-    !
-    return
   end subroutine ws_translate_dist
 
-  function R_wz_sc(R_in, R0) result (R_bz)
-    !! puts R_in in the Wigner-Seitz cell centered around R0
+  subroutine R_wz_sc(R_in, R0, ndeg, R_out, shifts)
+    !! Put R_in in the Wigner-Seitz cell centered around R0, 
+    !! and find all equivalent vectors to this (i.e., with same distance).
+    !! Return their coordinates and the degeneracy, as well as the integer
+    !! shifts needed to get the vector (these are always multiples of
+    !! the mp_grid, i.e. they are supercell displacements in the large supercell)
     use w90_parameters, only : real_lattice,recip_lattice, mp_grid
     use w90_utility,    only : utility_cart_to_frac,utility_frac_to_cart
-    use w90_io,         only : stdout
+    use w90_io,         only : stdout, io_error
     implicit none
-    real(DP),intent(in) :: R_in(3), R0(3)
-    real(DP) :: R(3), R_bz(3), R_f(3), R_in_f(3), mod2_R_bz
-    integer :: i,j,k
+    real(DP),intent(in) :: R_in(3)
+    real(DP),intent(in) :: R0(3)
+    integer,intent(out) :: ndeg
+    real(DP),intent(out) :: R_out(3,ndegenx)
+    integer,intent(out) :: shifts(3,ndegenx)
+
+    real(DP) :: R(3), R_f(3), R_in_f(3), R_bz(3), mod2_R_bz
+    integer :: i,j,k    
+    integer,parameter :: far = 3
+    !! How far shold we look? It depends on how far the WFs have been wandering
+    !! from their unit-cell, or if the cell is very slanted. 2 is almost always 
+    !! enough, 3 is enough in every case I have ever met
 
+    ! init
+    ndeg=0
+    R_out = 0._dp
+    shifts = 0
     R_bz = R_in
     mod2_R_bz = SUM((R_bz-R0)**2)
     !
     ! take R_bz to cryst(frac) coord for translating
-    call utility_cart_to_frac(R_in,R_in_f,recip_lattice)
+    call utility_cart_to_frac(R_bz,R_in_f,recip_lattice)
 
+    ! In this first loop, I just look for the shortest vector that I obtain
+    ! by trying to displace the second Wannier function by all 
+    ! 'large-supercell' vectors (in a far x far x far large-supercell grid).
     do i = -far, far
        do j = -far, far
           do k = -far, far
 
-             R_f = R_in_f + REAL( (/i*mp_grid(1),j*mp_grid(2),k*mp_grid(3)/), kind=DP)
+             R_f = R_in_f + REAL( (/i*mp_grid(1),j*mp_grid(2),k*mp_grid(3)/), &
+                                  kind=DP)
              call utility_frac_to_cart(R_f,R,real_lattice)
 
              if(SUM((R-R0)**2)<mod2_R_bz) then
                 R_bz = R
                 mod2_R_bz = SUM((R_bz-R0)**2)
+                ! I start to set a first shift that is applied to get R_bz.
+                ! Note: I reset these every time I find a smaller vector.
+                !
+                ! At this stage, this is the same for all potentially degenerate
+                ! points (hence the use of : in shifts(1,:), for instance)
+                ! In the second loop below, this shift will be added to the 
+                ! additional shift that differs for each degenerate but 
+                ! equivalent point
+                shifts(1,:) = i*mp_grid(1)
+                shifts(2,:) = j*mp_grid(2)
+                shifts(3,:) = k*mp_grid(3)
              endif
-
           enddo
        enddo
     enddo
-  end function R_wz_sc
-  !
-
-  subroutine R_wz_sc_equiv(R_in, R0, ndeg, R_out)
-    !! Find the list list of R_out that differ from R_in by a lattice vector
-    !! and are equally distant from R0 (i.e. that are on the edges of the WS
-    !! cell centered on R0)
-    use w90_parameters, only : real_lattice,recip_lattice, mp_grid
-    use w90_utility,    only : utility_cart_to_frac,utility_frac_to_cart
-    use w90_io,         only : stdout
-    implicit none
-    real(DP),intent(in)  :: R_in(3), R0(3)
-    real(DP),intent(out) :: R_out(3,ndegenx)
-    integer,intent(out)  :: ndeg
 
-    real(DP) :: R(3), R_f(3), R_in_f(3), mod2_R_bz
-    integer :: i,j,k
-    integer,parameter :: far = 3
+    ! Now, second loop to find the list list of R_out that differ from R_in 
+    ! by a large-supercell lattice vector and are equally distant from R0 
+    ! (i.e. that are on the edges of the WS cell centered on R0)
 
-    ! init
-    ndeg=0
-    R_out = 0._dp
-
-    mod2_R_bz = SUM((R_in-R0)**2)
+    ! I start from the last R_bz found
+    mod2_R_bz = SUM((R_bz-R0)**2)
     ! check if R0 and R_in are the same vector
     if(mod2_R_bz<ws_distance_tol**2)then
        ndeg=1
+       R_out(:,1) = R0
+       ! I can safely return as 'shifts' is already set
        return
     endif
     !
     ! take R_bz to cryst(frac) coord for translating
-    call utility_cart_to_frac(R_in,R_in_f,recip_lattice)
+    call utility_cart_to_frac(R_bz,R_in_f,recip_lattice)
 
     do i = -far, far
        do j = -far, far
           do k = -far, far
 
-             R_f = R_in_f + REAL( (/i*mp_grid(1),j*mp_grid(2),k*mp_grid(3)/), kind=DP)
+             R_f = R_in_f + REAL( (/i*mp_grid(1),j*mp_grid(2),k*mp_grid(3)/), &
+                                  kind=DP)
              call utility_frac_to_cart(R_f,R,real_lattice)
 
-             if(  ABS(SQRT(SUM((R-R0)**2))-SQRT(mod2_R_bz))<ws_distance_tol) then
+             if (ABS(SQRT(SUM((R-R0)**2))-SQRT(mod2_R_bz))<ws_distance_tol) then
                 ndeg=ndeg+1
+                IF(ndeg>ndegenx) then
+                  call io_error("surprising ndeg, I wouldn't expect a degeneracy larger than 8...")
+                END IF
                 R_out(:,ndeg) = R
+                ! I return/update also the shifts. Note that I have to sum these
+                ! to the previous value since in this second loop I am using 
+                ! R_bz (from the first loop) as the 'central' reference point, 
+                ! that is already shifted by shift(:,ndeg)
+                shifts(1,ndeg) = shifts(1,ndeg) + i*mp_grid(1)
+                shifts(2,ndeg) = shifts(2,ndeg) + j*mp_grid(2)
+                shifts(3,ndeg) = shifts(3,ndeg) + k*mp_grid(3)
              endif
 
           enddo
        enddo
     enddo
     !====================================================!
-  end subroutine R_wz_sc_equiv
+  end subroutine R_wz_sc
   !====================================================!
 
   !====================================================!

diff --git a/test-suite/userconfig.tmp b/test-suite/userconfig.tmp
@@ -60,7 +60,7 @@ inputs_args = ('*.win', '')
 run_cmd_template = tc.program tc.args tc.input tc.output tc.error 
 tolerance = ( (1.0e-3, 5.0e-3, 'bandenergy'),
 	      (1.0e-6, 1.0e-6, 'bandkpt'),
-	      (1.0e-3, 1.0e-3, 'bandderiv'),
+	      (1.0e-2, 1.0e-2, 'bandderiv'),
               (1.0e-6, 1.0e-6, 'bandidx'))
 
 skip_program = grep