diff --git a/configure b/configure index 7be7959ee9..efc5015e76 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23155 h.849241804d. +# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23180 h.e1699b65ed. # # Report bugs to . # @@ -610,8 +610,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='Yambo' PACKAGE_TARNAME='yambo' -PACKAGE_VERSION='5.2.0 r.23155 h.849241804d' -PACKAGE_STRING='Yambo 5.2.0 r.23155 h.849241804d' +PACKAGE_VERSION='5.2.0 r.23180 h.e1699b65ed' +PACKAGE_STRING='Yambo 5.2.0 r.23180 h.e1699b65ed' PACKAGE_BUGREPORT='yambo@yambo-code.org' PACKAGE_URL='' @@ -772,8 +772,6 @@ def_scalapack enable_scalapack SCALAPACK_LIBS BLACS_LIBS -with_petsc_branch -with_slepc_branch internal_slepc internal_petsc compile_slepc @@ -990,8 +988,6 @@ with_petsc_incs with_petsc_path with_petsc_libdir with_petsc_includedir -with_slepc_branch -with_petsc_branch enable_par_linalg with_blacs_libs with_scalapack_libs @@ -1604,7 +1600,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Yambo 5.2.0 r.23155 h.849241804d to adapt to many kinds of systems. +\`configure' configures Yambo 5.2.0 r.23180 h.e1699b65ed to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1670,7 +1666,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Yambo 5.2.0 r.23155 h.849241804d:";; + short | recursive ) echo "Configuration of Yambo 5.2.0 r.23180 h.e1699b65ed:";; esac cat <<\_ACEOF @@ -1755,8 +1751,6 @@ Optional Packages: --with-petsc-libdir= Path to the Petsc lib directory --with-petsc-includedir= Path to the Petsc include directory - --with-slepc-branch= Use the of the slepc repository. - --with-petsc-branch= Use the of the petsc repository. --with-blacs-libs=(libs|mkl) Use BLACS libraries or setup MKL --with-scalapack-libs=(libs|mkl) Use SCALAPACK libraries or setup MKL @@ -1882,7 +1876,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Yambo configure 5.2.0 r.23155 h.849241804d +Yambo configure 5.2.0 r.23180 h.e1699b65ed generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2511,7 +2505,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Yambo $as_me 5.2.0 r.23155 h.849241804d, which was +It was created by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3269,8 +3263,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu SVERSION="5" SSUBVERSION="2" SPATCHLEVEL="0" -SREVISION="23155" -SHASH="849241804d" +SREVISION="23180" +SHASH="e1699b65ed" @@ -12743,24 +12737,6 @@ then : fi - -# Check whether --with-slepc-branch was given. -if test ${with_slepc_branch+y} -then : - withval=$with_slepc_branch; -else $as_nop - with_slepc_branch=none -fi - - -# Check whether --with-petsc-branch was given. -if test ${with_petsc_branch+y} -then : - withval=$with_petsc_branch; -else $as_nop - with_petsc_branch=none -fi - # def_slepc="" petsc="no" @@ -13153,8 +13129,6 @@ fi - - # # ============================================================================ @@ -15436,7 +15410,7 @@ if test x"$enable_nvtx" = "x"; then enable_nvtx="no" ; fi # def_cuda="" CUDA_FLAGS="" -CUDA_LIBS="-cudalib=cufft,cublas,cusolver" +CUDA_LIBS="-Mcudalib=cufft,cublas,cusolver" # Available cc options: # cc20 Compile for compute capability 2.0 @@ -15458,12 +15432,12 @@ CUDA_LIBS="-cudalib=cufft,cublas,cusolver" printf %s "checking for CUDA support... " >&6; } if test x"$enable_cuda" = "xyes" ; then def_cuda="-D_CUDA" - CUDA_FLAGS="-cuda -gpu=cuda9.0,cc70,nollvm $CUDA_LIBS" + CUDA_FLAGS="-Mcuda=cuda9.0,cc70,nollvm $CUDA_LIBS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CUDA_FLAGS" >&5 printf "%s\n" "$CUDA_FLAGS" >&6; } elif ! test x"$enable_cuda" = "x" ; then def_cuda="-D_CUDA" - CUDA_FLAGS="-cuda -gpu=$enable_cuda $CUDA_LIBS" + CUDA_FLAGS="-Mcuda=$enable_cuda $CUDA_LIBS" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CUDA_FLAGS" >&5 printf "%s\n" "$CUDA_FLAGS" >&6; } fi @@ -15607,7 +15581,6 @@ PETSC_check="-" if test "$internal_petsc" = "yes" ; then if test "$compile_petsc" = "yes" ; then PETSC_check="C"; fi if test "$compile_petsc" = "no" ; then PETSC_check="I"; fi - if ! test "$with_petsc_branch" = "none"; then PETSC_LIBS="$PETSC_LIBS (git branch $with_petsc_branch)"; fi elif test "$enable_petsc" = "yes" ; then PETSC_check="E" fi @@ -15616,7 +15589,6 @@ SLEPC_check="-" if test "$internal_slepc" = "yes" ; then if test "$compile_slepc" = "yes" ; then SLEPC_check="C"; fi if test "$compile_slepc" = "no" ; then SLEPC_check="I"; fi - if ! test "$with_slepc_branch" = "none"; then SLEPC_LIBS="$SLEPC_LIBS (git branch $with_slepc_branch)"; fi elif test "$enable_slepc" = "yes" ; then SLEPC_check="E" fi @@ -16606,7 +16578,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Yambo $as_me 5.2.0 r.23155 h.849241804d, which was +This file was extended by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -16670,7 +16642,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -Yambo config.status 5.2.0 r.23155 h.849241804d +Yambo config.status 5.2.0 r.23180 h.e1699b65ed configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/driver/yambo.F b/driver/yambo.F index 46740be97f..f49585ef41 100644 --- a/driver/yambo.F +++ b/driver/yambo.F @@ -242,10 +242,10 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d ! #if defined _NL ! - ! RT + ! NL !==== ! - if (l_nl_optics) call NL_driver(en,X(:2),k,q,Dip) + if (l_nl_optics) call NL_driver(en,X(:2),Xw(:2),k,q,Dip) if (l_nl_optics) call mem_manager_report ! #endif diff --git a/include/version/version.m4 b/include/version/version.m4 index 59a11fbbbf..d464301e5f 100644 --- a/include/version/version.m4 +++ b/include/version/version.m4 @@ -1,9 +1,9 @@ -AC_INIT(Yambo, 5.2.0 r.23155 h.849241804d, yambo@yambo-code.org) +AC_INIT(Yambo, 5.2.0 r.23180 h.e1699b65ed, yambo@yambo-code.org) SVERSION="5" SSUBVERSION="2" SPATCHLEVEL="0" -SREVISION="23155" -SHASH="849241804d" +SREVISION="23180" +SHASH="e1699b65ed" AC_SUBST(SVERSION) AC_SUBST(SSUBVERSION) AC_SUBST(SPATCHLEVEL) diff --git a/lib/archive/package.list b/lib/archive/package.list index 8ed6c766f5..6cd99e58fe 100644 --- a/lib/archive/package.list +++ b/lib/archive/package.list @@ -34,8 +34,8 @@ version_libxc=5.2.3 version_lapack=3.12.0 version_blacs=missing version_scalapack=2.2.1 -version_slepc=3.20.2 -version_petsc=3.20.5 +version_slepc=3.21.0 +version_petsc=3.21.0 version_fftw=3.3.10 version_fftqe=missing version_yaml=0.2.2 diff --git a/src/Yio/io_control.F b/src/Yio/io_control.F index 47b8bd5b60..f9875066ed 100644 --- a/src/Yio/io_control.F +++ b/src/Yio/io_control.F @@ -24,6 +24,7 @@ subroutine io_control(ACTION,MODE,COM,SEC,ID,COMM,DO_IT) & RD,WR,RD_CL,WR_CL,OP_APP,OP_APP_CL,OP_WR,OP_RD,OP_WR_CL,OP_RD_CL,& & io_reset,io_PAR_comm,io_PAR_cpu,IO_read,IO_write,& & write_is_on + use stderr, ONLY:intc ! implicit none ! @@ -41,14 +42,14 @@ subroutine io_control(ACTION,MODE,COM,SEC,ID,COMM,DO_IT) if ( .not. any( (/ ACTION==RD_CL, ACTION==OP_RD_CL, ACTION==OP_WR_CL, & & ACTION==OP_APP_CL, ACTION==OP_RD, ACTION==OP_APP, & & ACTION==OP_WR, ACTION==RD, ACTION==WR,ACTION==WR_CL/) ) ) & -& call error("io_control called with a non proper action") +& call error("io_control called with a non proper action :"//intc(ACTION)) ! ! Assign a new ID if the unit is not already open ! define_ID_WR=any((/ACTION==OP_WR_CL, ACTION==OP_WR, ACTION==OP_APP_CL, ACTION==OP_APP/)) define_ID_RD=any((/ACTION==OP_RD_CL, ACTION==OP_RD/)) ! - ! Except for the cases where the ID is defined, io_control is a trivial subrtouine + ! Except for the cases where the ID is defined, io_control is a trivial subrtouine (this is a joke) ! if (.not. (define_ID_RD .or. define_ID_WR) ) then call set_action_mode_com_and_sec() diff --git a/src/allocations/X_ALLOC_elemental.F b/src/allocations/X_ALLOC_elemental.F index 51652e624f..d54301c2e7 100644 --- a/src/allocations/X_ALLOC_elemental.F +++ b/src/allocations/X_ALLOC_elemental.F @@ -8,7 +8,8 @@ subroutine X_ALLOC_elemental(WHAT,DIM) ! use pars, ONLY:rZERO,cZERO - use X_m, ONLY:Resp_ii,Resp_ij,Epsilon_ii,Joint_DOS,BS_E_sorted,Epsilon_ij,X_magnons,X_dichroism,& + use X_m, ONLY:Resp_ii,Resp_ij,Epsilon_ii,Joint_DOS,BS_E_sorted,BS_E_sorted_indx, & +& Epsilon_ij,X_magnons,X_dichroism, & & X_fxc,EEL,Alpha,X_mat,X_drude_term,X_RLcomp_ig use PHOTOLUM, ONLY:PL #ifdef _CUDA @@ -58,6 +59,8 @@ subroutine X_ALLOC_elemental(WHAT,DIM) case('BS_E_SORTED') YAMBO_ALLOC(BS_E_sorted,(DIM(1),DIM(2),DIM(3))) BS_E_sorted=cZERO + YAMBO_ALLOC(BS_E_sorted_indx,(DIM(1),2*DIM(2),5)) + BS_E_sorted_indx=0 case('FXC') YAMBO_ALLOC(X_fxc,(DIM(1))) X_fxc=cZERO @@ -103,6 +106,7 @@ subroutine X_ALLOC_elemental(WHAT,DIM) endif if (STRING_match(WHAT,"BS_E_SORTED").or.STRING_match(WHAT,"ALL")) then YAMBO_FREE(BS_E_sorted) + YAMBO_FREE(BS_E_sorted_indx) endif if (STRING_match(WHAT,"FXC").or.STRING_match(WHAT,"ALL")) then YAMBO_FREE(X_fxc) diff --git a/src/bse/.objects b/src/bse/.objects index a054e18bf0..a2f9ee2ab2 100644 --- a/src/bse/.objects +++ b/src/bse/.objects @@ -10,15 +10,15 @@ SLK_objects = K_inversion_do_it_SLK.o #endif objs = K_blocks.o K_driver_init.o K_dipoles.o K_IP.o K_IP_sort.o \ K_Transitions_setup.o K_Transition_check.o \ - K_driver.o \ + K_driver.o BSE_utilities.o \ K_WF_phases.o \ - K.o $(PARIO_objects) K_correlation_collisions_dir.o K_correlation_collisions_std.o K_exchange_collisions.o \ + K_kernel.o $(PARIO_objects) K_correlation_collisions_dir.o K_correlation_collisions_std.o K_exchange_collisions.o \ K_correlation_kernel_dir.o K_correlation_kernel_std.o K_exchange_kernel.o \ K_restart.o K_solvers.o K_Haydock.o K_Haydock_response.o K_Haydock_gather.o K_Haydock_scatter.o K_screened_interaction.o \ K_inversion_do_it_full.o EPS_via_perturbative_inversion.o \ K_inversion_driver.o K_diagonal.o K_inversion_Lo.o K_inversion_restart.o K_inversion_engine.o \ - K_diago_driver.o K_diago_non_hermitian_residuals.o K_diago_hermitian_residuals.o \ + K_diago_driver.o K_diago_left_residuals.o K_diago_right_residuals.o K_diago_overlap_matrix.o \ K_diago_perturbative.o K_diago_response_functions.o K_eps_interpolate.o \ K_observables.o K_multiply_by_V.o K_multiply_by_V_transpose.o K_dot_product.o K_components_folded_in_serial_arrays.o \ - K_stored_in_a_big_matrix.o K_diago_kerr_residual.o K_diago_magnons_residual.o \ + K_stored_in_a_big_matrix.o \ PL_diago_residual.o PL_via_perturbative_inversion.o $(SLEPC_objects) $(SLK_objects) diff --git a/src/bse/BSE_utilities.F b/src/bse/BSE_utilities.F new file mode 100644 index 0000000000..f38dbbd2c0 --- /dev/null +++ b/src/bse/BSE_utilities.F @@ -0,0 +1,120 @@ +! +! License-Identifier: GPL +! +! Copyright (C) 2023 The Yambo Team +! +! Authors (see AUTHORS file for details): AM MG DS +! +subroutine BS_exchange_oscillators_alloc(iG) + use BS, ONLY:BS_T_grp,BS_n_g_exch +#include + integer, intent(in) :: iG + if (BS_T_grp(iG)%size==0) return + YAMBO_ALLOC(BS_T_grp(iG)%O_x,(BS_n_g_exch,BS_T_grp(iG)%size)) +#ifdef _CUDA + allocate(BS_T_grp(iG)%O_x_d,mold=BS_T_grp(iG)%O_x) +#endif +end subroutine BS_exchange_oscillators_alloc +! +subroutine BS_correlation_oscillators_alloc(iB) + use BS, ONLY:BS_blk,O_ng +#include + integer, intent(in) :: iB + if (BS_blk(iB)%N_oscillators==0) return + YAMBO_ALLOC(BS_blk(iB)%O_c,(O_ng,BS_blk(iB)%N_oscillators)) +#ifdef _CUDA + allocate(BS_blk(iB)%O_c_d,mold=BS_blk(iB)%O_c) +#endif +end subroutine BS_correlation_oscillators_alloc +! +subroutine BS_exchange_oscillators_free(iG_ref) + use BS, ONLY:BS_T_grp +#include + integer, intent(in) :: iG_ref + integer :: iG + do iG=iG_ref,1,-1 + YAMBO_FREE(BS_T_grp(iG)%O_x) +#ifdef _CUDA + if (allocated(BS_T_grp(iG)%O_x_d)) deallocate(BS_T_grp(iG)%O_x_d) +#endif + enddo +end subroutine BS_exchange_oscillators_free +! +subroutine BS_correlation_oscillators_free(iB_ref,l_std_alg) + use BS, ONLY:BS_T_grp,l_BSE_minimize_memory,BS_blk,n_BS_blks +#include + integer, intent(in) :: iB_ref + logical, intent(in) :: l_std_alg + integer :: iB,ik_loop,ip_loop,ik_now,ip_now + ik_now=BS_blk(iB_ref)%ik + ip_now=BS_blk(iB_ref)%ip + if(iB_ref==n_BS_blks) then + ik_now=0 + ip_now=0 + endif + do iB=iB_ref,1,-1 + ik_loop=BS_blk(iB)%ik + ip_loop=BS_blk(iB)%ip + if ( ik_now==ik_loop .and. ip_now==ip_loop .and. & + & .not.(l_BSE_minimize_memory.or.l_std_alg)) cycle + if (.not.allocated(BS_blk(iB)%O_c)) exit + YAMBO_FREE(BS_blk(iB)%O_c) + YAMBO_FREE(BS_blk(iB)%O_table) + YAMBO_FREE(BS_blk(iB)%kp_table) +#ifdef _CUDA + if (allocated(BS_blk(iB)%O_c_d)) deallocate(BS_blk(iB)%O_c_d) +#endif + enddo +end subroutine BS_correlation_oscillators_free +! +subroutine TDDFT_oscillators_alloc_L(iGL) + use electrons, ONLY:n_spin,n_spinor + use BS, ONLY:BS_T_grp,BS_n_g_fxc +#include + integer, intent(in) :: iGL + if (BS_T_grp(iGL)%size>0) then + YAMBO_ALLOC(BS_T_grp(iGL)%O_tddft_L,(BS_n_g_fxc,BS_T_grp(iGL)%size,n_spinor,n_spinor)) +#ifdef _CUDA + allocate(BS_T_grp(iGL)%O_tddft_L_d,mold=BS_T_grp(iGL)%O_tddft_L) +#endif + endif +end subroutine TDDFT_oscillators_alloc_L +! +subroutine TDDFT_oscillators_alloc_R(iGR) + use electrons, ONLY:n_spin,n_spinor + use BS, ONLY:BS_T_grp,BS_n_g_fxc +#include + integer, intent(in) :: iGR + if (BS_T_grp(iGR)%size>0) then + YAMBO_ALLOC(BS_T_grp(iGR)%O_tddft_R,(BS_n_g_fxc,BS_T_grp(iGR)%size,n_spin,n_spin)) +#ifdef _CUDA + allocate(BS_T_grp(iGR)%O_tddft_R_d,mold=BS_T_grp(iGR)%O_tddft_R) +#endif + endif +end subroutine TDDFT_oscillators_alloc_R +! +subroutine TDDFT_oscillators_free_L(iG_ref) + use BS, ONLY:BS_T_grp +#include + integer, intent(in) :: iG_ref + integer :: iGL + do iGL=iG_ref,1,-1 + YAMBO_FREE(BS_T_grp(iGL)%O_tddft_L) +#ifdef _CUDA + if (allocated(BS_T_grp(iGL)%O_tddft_L_d)) deallocate(BS_T_grp(iGL)%O_tddft_L_d) +#endif + enddo +end subroutine TDDFT_oscillators_free_L +! +subroutine TDDFT_oscillators_free_R(iG_ref) + use BS, ONLY:BS_T_grp +#include + integer, intent(in) :: iG_ref + integer :: iGR + do iGR=iG_ref,1,-1 + YAMBO_FREE(BS_T_grp(iGR)%O_tddft_R) +#ifdef _CUDA + if (allocated(BS_T_grp(iGR)%O_tddft_R_d)) deallocate(BS_T_grp(iGR)%O_tddft_R_d) +#endif + enddo +end subroutine TDDFT_oscillators_free_R diff --git a/src/bse/DOUBLE_project.dep b/src/bse/DOUBLE_project.dep index bb0a01a38a..5bfc14f7b2 100644 --- a/src/bse/DOUBLE_project.dep +++ b/src/bse/DOUBLE_project.dep @@ -1,5 +1,5 @@ + BSE_utilities.o EPS_via_perturbative_inversion.o - K.o K_Haydock.o K_Haydock_gather.o K_Haydock_response.o @@ -19,10 +19,13 @@ K_diago_driver.o K_diago_hermitian_residuals.o K_diago_kerr_residual.o + K_diago_left_residuals.o K_diago_magnons_residual.o K_diago_non_hermitian_residuals.o + K_diago_overlap_matrix.o K_diago_perturbative.o K_diago_response_functions.o + K_diago_right_residuals.o K_diagonal.o K_dipoles.o K_dot_product.o @@ -37,6 +40,7 @@ K_inversion_driver.o K_inversion_engine.o K_inversion_restart.o + K_kernel.o K_multiply_by_V.o K_multiply_by_V_slepc.o K_multiply_by_V_transpose.o diff --git a/src/bse/K_Haydock.F b/src/bse/K_Haydock.F index f9520f4e87..d715448a5f 100644 --- a/src/bse/K_Haydock.F +++ b/src/bse/K_Haydock.F @@ -9,6 +9,7 @@ subroutine K_Haydock(iq,W,OBS,Xk) ! use pars, ONLY:SP,schlen,cI,cZERO,cONE use functions, ONLY:NAN + use R_lattice, ONLY:q0_def_norm use X_m, ONLY:global_gauge,X_OUTPUT_driver use wrapper_omp, ONLY:V_plus_alpha_V_omp use frequency, ONLY:w_samp @@ -228,6 +229,7 @@ subroutine K_Haydock(iq,W,OBS,Xk) do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g) if (trim(OBS)=="optics") V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1) if (trim(OBS)=="kerr") V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(2,:,1) + if (trim(OBS)=="dichroism") V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_dic(1,:,1) if (trim(OBS)=="magnons") V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_mag(1,:,1) V0(i_g)%fragment(:,i_ColFG) = V0(i_g)%fragment(:,i_ColFG)*sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP)) enddo @@ -266,9 +268,10 @@ subroutine K_Haydock(iq,W,OBS,Xk) if (.not.PAR_IND_T_groups%element_1D(i_g)) cycle do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g) if (trim(OBS)=="kerr") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1) + if (trim(OBS)=="dichroism") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)/q0_def_norm W0(i_g)%fragment(:,i_ColFG) = W0(i_g)%fragment(:,i_ColFG)*(sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP))) enddo - if(velocity_correction) V_tmp(i_g)%fragment = W0(i_g)%fragment/BS_T_grp(i_g)%E + if(velocity_correction) V_tmp(i_g)%fragment = W0(i_g)%fragment/BS_T_grp(i_g)%E enddo ! do i_g = 1,BS_nT_grps diff --git a/src/bse/K_Haydock_response.F b/src/bse/K_Haydock_response.F index 03e540fbd9..8d59e1c6a0 100644 --- a/src/bse/K_Haydock_response.F +++ b/src/bse/K_Haydock_response.F @@ -7,9 +7,10 @@ ! subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_threshold,FG_factor) ! - use pars, ONLY:SP,cZERO,cONE,pi + use pars, ONLY:SP,cZERO,cONE,cI,pi + use units, ONLY:SPEED_OF_LIGHT use X_m, ONLY:Resp_ii,Joint_DOS,global_gauge, & -& X_magnons,Resp_ij,i_G_shift +& X_magnons,Resp_ij,X_dichroism,i_G_shift use R_lattice, ONLY:bare_qpg use frequency, ONLY:w_samp use BS, ONLY:BSE_mode @@ -223,6 +224,8 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh Resp_ij(iw,i_resp(:))= -X_o(1:2,1)*Co case("magnons") X_magnons(iw,1,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi) + case("dichroism") + X_dichroism(iw,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi)*(cI/SPEED_OF_LIGHT) end select ! enddo @@ -242,6 +245,9 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh case("magnons") sums=(X_magnons(iw,1,2)+X_magnons(iw,1,4))/2._SP diff= X_magnons(iw,1,2)-X_magnons(iw,1,4) + case("dichroism") + sums=(X_dichroism(iw,2)+X_dichroism(iw,4))/2._SP + diff= X_dichroism(iw,2)-X_dichroism(iw,4) end select ! update_thresh = abs(real(sums))>0._SP .and. abs(aimag(sums))>0._SP diff --git a/src/bse/K_IP.F b/src/bse/K_IP.F index e7ce35f566..1430298f37 100644 --- a/src/bse/K_IP.F +++ b/src/bse/K_IP.F @@ -21,8 +21,8 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) & skip_cond_sum_rule,X_magnons,Resp_ij,X_dichroism use BS_solvers, ONLY:Co_factor,diam_term_exact,para_term_w0,BSS_Wd,BSS_mode,BSS_uses_DbGd,FG_factor use BS, ONLY:BS_T_grp,BS_nkFGbz_in_Tgrp,BS_nT_grps,BS_perturbative_SOC,& - & BS_dip_size,l_BS_jdos,l_BS_esort,l_BS_trace,BS_H_dim,& - & l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,& +& BS_dip_size,l_BS_jdos,l_BS_esort,l_BS_trace,BS_H_dim,l_BS_kerr_asymm,& +& l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,& & BS_K_anti_resonant,l_BS_ares_from_res use parallel_int, ONLY:PP_redux_wait use parallel_m, ONLY:PAR_BS_nT_col_grps,PAR_COM_eh_INDEX,PAR_IND_T_groups,& @@ -43,7 +43,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! ! Work space ! - integer ::iv,ic,i_T_g,i_T,n_SOC,i_pert_SOC,i_res_ares,i_sort,nVAR,i_dip + integer ::i_T_g,i_T,n_SOC,i_pert_SOC,i_res_ares,i_sort,nVAR,Indexes(5) real(SP) ::f_eh,f_eh_PL,res_PL logical ::dip_v,dip_r complex(SP) ::res_abs,E_plus_W0,E_plus_W(2/n_sp_pol),Z_eh,E_eh,g_fac @@ -52,6 +52,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) complex(SP), allocatable :: E_DbGd(:) ! complex(SP) ::P_weighted(3),res_kerr,res_magn(2),res_dich + integer, allocatable :: vtmpI(:,:) complex(SP), allocatable :: vtmpC(:,:) ! if (l_bs_fxc) return @@ -97,13 +98,15 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! Sorting pre-setup !------------------- if(l_BS_esort) then - N_BS_E_sorted=2 ! Energies (IP + degeneracy) + N_BS_E_sorted=2 ! Energies (IP + degeneracy) if(l_BS_abs) n_BS_E_sorted=n_BS_E_sorted+1 if(l_BS_kerr) n_BS_E_sorted=n_BS_E_sorted+1 if(l_BS_dichroism) n_BS_E_sorted=n_BS_E_sorted+1 if(l_BS_magnons) n_BS_E_sorted=n_BS_E_sorted+n_spinor YAMBO_ALLOC(vtmpC,(BS_H_dim,n_BS_E_sorted)) vtmpC=cZERO + YAMBO_ALLOC(vtmpI,(BS_H_dim,5)) + vtmpI=0 endif ! if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then @@ -128,6 +131,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! Energy !======== ! + Indexes=BS_T_grp(i_T_g)%table(i_T,:) E_plus_W0=BS_T_grp(i_T_g)%E(i_T,1) if(allocated(BS_T_grp(i_T_g)%W)) then E_plus_W0=BS_T_grp(i_T_g)%E(i_T,1)-cI*BS_T_grp(i_T_g)%W(i_T) @@ -147,7 +151,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) enddo ! if (l_BS_esort.and.i_res_ares==1) then - call K_IP_sort("add",i_sort,nVAR,E_plus_W0,vtmpC) + call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,E_plus_W0,vtmpC) nVAR=nVAR+1 endif ! @@ -163,21 +167,28 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! Residuals DIP_cv * DIP_vc ! if(l_BS_abs) then - res_abs=abs(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))**2 + res_abs=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)) + if(l_BS_kerr_asymm) then + res_abs=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)))/2._SP + endif if(l_BS_trace) then - do i_dip=2,BS_dip_size - res_abs=res_abs+abs(BS_T_grp(i_T_g)%dipoles_opt(i_dip,i_T,1))**2 - enddo - res_abs=res_abs/real(BS_dip_size,SP) + res_abs=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)))/3._SP endif res_abs=res_abs*f_eh*Z_eh*E_eh - if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_abs,vtmpC) + if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_abs,vtmpC) endif ! if (l_BS_kerr) then res_kerr=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)) + if(l_BS_kerr_asymm) then + res_kerr=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))- & + & BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)))/2._SP + endif res_kerr=res_kerr*Z_eh*E_eh*f_eh - if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_kerr,vtmpC) + if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_kerr,vtmpC) endif ! if (l_BS_photolum) then @@ -192,16 +203,16 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! Dichroism ! if(l_BS_dichroism) then - res_dich=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1)) + res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1)))/2._SP if(l_BS_trace) then - do i_dip=2,BS_dip_size - ! x*L = DIP_x*DIP_orbital; - res_dich=res_dich+BS_T_grp(i_T_g)%dipoles_opt(i_dip,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(i_dip,i_T,1)) - enddo - res_dich=res_dich/real(BS_dip_size,SP) + ! x*L = DIP_x*DIP_orbital; + res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1))+ & + & BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(3,i_T,1)))/3._SP endif res_dich=res_dich*f_eh*Z_eh/(q0_def_norm) - if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_dich,vtmpC) + if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_dich,vtmpC) endif ! ! Magnons @@ -213,8 +224,8 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) res_magn(:)=BS_T_grp(i_T_g)%dipoles_mag(:,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_mag(:,i_T,1)) res_magn=res_magn*f_eh*Z_eh if (l_BS_esort.and.i_res_ares==1) then - call K_IP_sort("add",i_sort,nVAR,res_magn(1),vtmpC) - if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,res_magn(2),vtmpC) + call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(1),vtmpC) + if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(2),vtmpC) endif endif ! @@ -305,7 +316,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) ! ! Sorting procedure !=================== - if (l_BS_esort) call K_IP_sort("sort",0,nVAR,cZERO,vtmpC) + if (l_BS_esort) call K_IP_sort("sort",0,nVAR,(/0,0,0,0,0/),vtmpI,cZERO,vtmpC) ! ! ALL 2 ALL !============== @@ -372,7 +383,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss) if (l_BS_magnons) X_magnons(:,:,3)=X_magnons(:,:,3)*Co_factor/(4._SP*pi) ! if ( l_rpa_IP) call K_OUTPUT(iq,W_bss,"open dump IP close reset","optics kerr magnons PL dichroism") - call K_OUTPUT(iq,W_bss,"open dump close reset bare","E_IP") + call K_OUTPUT(iq,W_bss,"open dump close reset bare","E_IP E_INDX_IP") ! ! Clean ! diff --git a/src/bse/K_IP_sort.F b/src/bse/K_IP_sort.F index 9c72fbd310..38a6068617 100644 --- a/src/bse/K_IP_sort.F +++ b/src/bse/K_IP_sort.F @@ -5,12 +5,12 @@ ! ! Authors (see AUTHORS file for details): DS AM ! -subroutine K_IP_sort(what,I,N,El,vC) +subroutine K_IP_sort(what,I,N,Indexes,vI,El,vC) ! use pars, ONLY:SP,cZERO use parallel_m, ONLY:PAR_COM_eh_INDEX,PAR_COM_Xk_ibz_INDEX - use X_m, ONLY:BS_E_sorted,N_BS_E_sorted - use BS, ONLY:BS_H_dim + use X_m, ONLY:BS_E_sorted,N_BS_E_sorted,BS_E_sorted_indx + use BS, ONLY:BS_H_dim,l_BS_esort_indx use vec_operate, ONLY:sort,degeneration_finder use units, ONLY:HA2EV use parallel_int, ONLY:PP_redux_wait @@ -18,11 +18,12 @@ subroutine K_IP_sort(what,I,N,El,vC) #include ! character(*) ::what - integer ::I,N + integer ::I,N,Indexes(5),vI(BS_H_dim,5) complex(SP) ::El,vC(BS_H_dim,N_BS_E_sorted) ! ! Work space ! + logical :: l_print_warning integer :: EDIM,Ntmp,i_VAR,i_cv,i_grp,n_deg_grp real(SP), allocatable :: vR(:) integer, allocatable :: sort_indx(:),first_el(:),n_of_el(:) @@ -30,11 +31,15 @@ subroutine K_IP_sort(what,I,N,El,vC) if (what=="add") then N=N+1 vC(I,N)=El + if(N==1) vI(I,:)=Indexes return endif ! + ! call PP_redux_wait(vC,COMM=PAR_COM_Xk_ibz_INDEX%COMM ) + call PP_redux_wait(vI,COMM=PAR_COM_Xk_ibz_INDEX%COMM ) call PP_redux_wait(vC,COMM=PAR_COM_eh_INDEX%COMM ) + call PP_redux_wait(vI,COMM=PAR_COM_eh_INDEX%COMM ) ! YAMBO_ALLOC(vR,(BS_H_dim)) YAMBO_ALLOC(sort_indx,(BS_H_dim)) @@ -52,21 +57,35 @@ subroutine K_IP_sort(what,I,N,El,vC) ! Threshold is 1meV call degeneration_finder(vR(:EDIM),EDIM,first_el,n_of_el,n_deg_grp,0.001_SP/HA2EV,Include_single_values=.TRUE.) ! + l_print_warning=.false. + ! do i_VAR=1,n_BS_E_sorted BS_E_sorted(:,i_VAR,1)=cZERO do i_grp=1,n_deg_grp if(i_grp>EDIM/Ntmp) cycle + ! + if(i_VAR==1.and.l_BS_esort_indx) then + if (n_of_el(i_grp)>18) l_print_warning=.true. + do i_cv=first_el(i_grp),first_el(i_grp)+min(18,n_of_el(i_grp))-1 + BS_E_sorted_indx(i_grp,i_cv-first_el(i_grp)+1,1:5)=vI(sort_indx(i_cv),:) + enddo + endif + ! if(i_VAR==2) then BS_E_sorted(i_grp,i_VAR,1)=real(n_of_el(i_grp),SP) cycle endif + ! do i_cv=first_el(i_grp),first_el(i_grp)+n_of_el(i_grp)-1 BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)+vC(sort_indx(i_cv),i_VAR) enddo - BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)/real(n_of_el(i_grp),SP) + if(i_VAR==1) BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)/real(n_of_el(i_grp),SP) enddo + ! enddo ! + if (l_print_warning) call warning("More than 18 degenerate transitions detected. Printing only first 18") + ! ! Free deallocate(first_el) deallocate(n_of_el) diff --git a/src/bse/K_Transitions_setup.F b/src/bse/K_Transitions_setup.F index 8def415263..b394f0688a 100644 --- a/src/bse/K_Transitions_setup.F +++ b/src/bse/K_Transitions_setup.F @@ -5,18 +5,8 @@ ! ! Authors (see AUTHORS file for details): AM DS IMA ! -subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip) +subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands) ! - ! mode=K - ! ====== - ! Out: - ! - ! BS_K_dim(1) - ! BS_H_dim - ! BS_nT_at_k - ! - ! mode=T - ! ====== ! Filling of Transition groups ! !type BS_T_group @@ -33,106 +23,83 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip) use pars, ONLY:SP use stderr, ONLY:intc use BS_solvers, ONLY:BSS_mode,BSS_uses_DbGd - ! mode ="T" use BS, ONLY:T_group,BS_bands,BS_T_grp,BS_nT_grps,BS_n_eh_spaces,& & BS_Blocks_and_Transitions_alloc,BS_perturbative_SOC,& & BS_nkFGbz_in_Tgrp - ! mode ="K" - use BS, ONLY:BS_K_dim,BS_H_dim,BS_nT_at_k,BS_K_coupling, & -& l_BS_ares_from_res,BS_res_ares_n_mat use DIPOLES, ONLY:DIPOLE_t use parallel_m, ONLY:PAR_IND_WF_b_and_k,PAR_IND_T_ordered,PAR_IND_T_groups,& & PAR_IND_T_Haydock,PP_indexes_reset - use electrons, ONLY:levels,n_sp_pol,n_full_bands,n_met_bands + use electrons, ONLY:levels,n_sp_pol use com, ONLY:msg use R_lattice, ONLY:qindx_X,bz_samp use BS, ONLY:l_BS_magnons,l_BS_photolum #if defined _RT + use electrons, ONLY:n_full_bands,n_met_bands use drivers, ONLY:l_rt_carriers_in_use use RT_control, ONLY:EQ_Transitions,EQ_NoOcc #endif ! #include ! - character(1) :: mode ! "K" for k-resolved, "T" for transitions groups - type(levels) :: Ken - type(bz_samp) :: Xk - type(DIPOLE_t) :: Dip - integer :: iq + type(levels), intent(in) :: Ken + type(bz_samp), intent(in) :: Xk + type(DIPOLE_t), intent(in) :: Dip + integer, intent(in) :: iq + integer, intent(in) :: V_bands(2),C_bands(2) ! ! Work Space ! real(SP) :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL - logical :: l_trans_in_group,l_grp_to_grp,l_skip_unoccupied - integer :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,isp1,isp2, & + logical :: l_trans_in_group,l_grp_to_grp + integer :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2, & & i_T_now,i_T_group_now,i_T_group_last,i_T_group_start,i_g,n_loops,i_loop,i_T_in_the_grp,i_res_ares,& -& BS_nT_at_k_tmp(Xk%nibz*2),V_bands(2),C_bands(2),& & nColFG_in_T_group_now,i_ColFG,ikFGbz,ikFG ! logical, external :: K_Transition_check ! - if (mode=="K") call section("+P",'Transition pre-analysis @q'//trim(intc(iq))) - if (mode=="T") call section("=",'Transition Groups build-up @q'//trim(intc(iq))) + call section("=",'Transition Groups build-up @q'//trim(intc(iq))) ! - call k_build_up_BZ_tables(Xk) + ! BS_nT_grps is computed in PARALLEL_Transition_grouping ! - ! BS_nT_grps is first computed in PARALLEL_Transition_grouping + call msg('r','[BSK] Transition groups (total)',BS_nT_grps) ! - if (mode=="T") then - allocate(BS_T_grp(BS_nT_grps)) - do i_g=1,BS_nT_grps - BS_T_grp(i_g)%size=0 - enddo - n_loops=2 - ! - allocate(BS_nkFGbz_in_Tgrp(BS_nT_grps)) - ! - call PP_indexes_reset(PAR_IND_WF_b_and_k) - YAMBO_ALLOC(PAR_IND_WF_b_and_k%element_2D,(BS_bands(2),Xk%nibz)) - PAR_IND_WF_b_and_k%element_2D=.FALSE. - else - BS_K_dim=0 - BS_nT_at_k_tmp=0 - n_loops=1 - endif + allocate(BS_T_grp(BS_nT_grps)) + do i_g=1,BS_nT_grps + BS_T_grp(i_g)%size=0 + enddo + n_loops=2 ! - l_skip_unoccupied=.true. - V_bands=(/BS_bands(1),maxval(Ken%nbm)/) - C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/) -#if defined _RT - ! Bands range. In the _RT case %nbf and %nbm turn metallic when carriers are used. - if (l_rt_carriers_in_use.and.EQ_Transitions) then - l_skip_unoccupied=.false. - V_bands=(/BS_bands(1),maxval(n_met_bands)/) - C_bands=(/minval(n_full_bands)+1,BS_bands(2)/) - endif - if (EQ_NoOcc) then - l_skip_unoccupied=.false. - V_bands=BS_bands - C_bands=BS_bands - endif -#endif + allocate(BS_nkFGbz_in_Tgrp(BS_nT_grps)) + ! + call PP_indexes_reset(PAR_IND_WF_b_and_k) + YAMBO_ALLOC(PAR_IND_WF_b_and_k%element_2D,(BS_bands(2),Xk%nibz)) + PAR_IND_WF_b_and_k%element_2D=.FALSE. ! do i_loop=1,n_loops ! - if (mode=="T") then - i_T_now=0 - i_T_group_now=0 - i_T_group_last=0 - endif + i_T_now=0 + i_T_group_now=0 + i_T_group_last=0 + ! + nColFG_in_T_group_now=1 ! ! DS: WARNING ! I cannot revert ik and i_sp_pol loop here ... ! do i_res_ares=1,BS_n_eh_spaces ! - if (mode=="T") i_T_group_start=i_T_group_now+1 + i_T_group_start=i_T_group_now+1 + ! + ! Here I cannot invert the ik and the i_sp loop since + ! BS_T_grp and BS_nT_at_k are defined according to kpts while they + ! should be defined according to both i_sp_pol and kpts ! do ik=1,Xk%nibz ! - ! Here I cannot invert the ik and the i_sp loop since - ! BS_T_grp and BS_nT_at_k are defined according to kpts while they - ! should be defined according to both i_sp_pol and kpts + if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then + nColFG_in_T_group_now = maxval(Xk%FGbz%N_mapped(& +& Xk%k_table(ik,Xk%star(ik,:Xk%nstar(ik))))) + end if ! ! Moreover I cannot move iv,ic loops insiede i_sp_pol and i_star loops ! Otherwise runs with parallelization over eh fail @@ -140,13 +107,6 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip) ! of the algorithm with the qindx_B distributed in memory very slow ! since i_k_bz and i_p_bz change at each iteration of the loop ! - if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then - nColFG_in_T_group_now = maxval(Xk%FGbz%N_mapped(& -& Xk%k_table(ik,Xk%star(ik,:Xk%nstar(ik))))) - else - nColFG_in_T_group_now=1 - end if - ! do iv=V_bands(1),V_bands(2) do ic=C_bands(1),C_bands(2) ! @@ -154,16 +114,10 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip) i_sp_v=i_sp_c ! if (l_BS_magnons.and.n_sp_pol==2) then - if(i_res_ares==1.and.i_sp_c==1) cycle - if(i_res_ares==2.and.i_sp_c==2) cycle + if(i_sp_c==1) cycle i_sp_v=mod(i_sp_c,n_sp_pol)+1 endif ! - if( l_skip_unoccupied) then - if( iv>Ken%nbm(i_sp_v) ) cycle - if( ic + ! + type(levels), intent(in) :: Ken + type(bz_samp), intent(in) :: Xk + type(DIPOLE_t), intent(in) :: Dip + integer, intent(in) :: iq + integer, intent(out):: V_bands(2),C_bands(2) + ! + ! Work Space + ! + real(SP) :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL + integer :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2, & +& i_res_ares,BS_nT_at_k_tmp(Xk%nibz*2) + ! + logical, external :: K_Transition_check + ! + ! + call section("+P",'Transition pre-analysis @q'//trim(intc(iq))) + ! + call k_build_up_BZ_tables(Xk) + ! + BS_K_dim=0 + BS_nT_at_k_tmp=0 ! - if (mode=="T") then - call msg('r','[BSK] Transition groups (total)',BS_nT_grps) - return + V_bands=(/BS_bands(1),maxval(Ken%nbm)/) + C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/) +#if defined _RT + ! Bands range. In the _RT case %nbf and %nbm turn metallic when carriers are used. + if (l_rt_carriers_in_use.and.EQ_Transitions) then + V_bands=(/BS_bands(1),maxval(n_met_bands)/) + C_bands=(/minval(n_full_bands)+1,BS_bands(2)/) + endif + if (EQ_NoOcc) then + V_bands=BS_bands + C_bands=BS_bands endif +#endif + ! + do i_res_ares=1,BS_n_eh_spaces + ! + do i_sp_c=1,n_sp_pol + ! + i_sp_v=i_sp_c + ! + if (l_BS_magnons.and.n_sp_pol==2) then + if(i_sp_c==1) cycle + i_sp_v=mod(i_sp_c,n_sp_pol)+1 + endif + ! + do ik=1,Xk%nibz + do i_star=1,Xk%nstar(ik) + ! + ikbz=Xk%k_table(ik,Xk%star(ik,i_star)) + ikpbz=qindx_X(iq,ikbz,1) + ikp =Xk%sstar(ikpbz,1) + ! + do iv=V_bands(1),V_bands(2) + do ic=C_bands(1),C_bands(2) + ! + if (i_res_ares==1) then; ib1=iv; ib2=ic; endif + if (i_res_ares==2) then; ib1=ic; ib2=iv; endif + ! + if (.not.K_Transition_check(Ken,Dip,iq,ik,ikp,ib1,ib2,i_sp_c,i_res_ares,& + & E_eh,E_eh_SOC_corr,f_eh,f_eh_RES,f_eh_PL)) cycle + ! + BS_K_dim(i_res_ares)=BS_K_dim(i_res_ares)+1 + BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)=BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)+1 + ! + enddo ! conduction + enddo ! valence + ! + enddo ! symmetries + enddo ! k (ibz) + enddo ! spin + ! + enddo ! i_res_ares ! YAMBO_ALLOC(BS_nT_at_k,(Xk%nibz*BS_n_eh_spaces)) BS_nT_at_k=BS_nT_at_k_tmp(:Xk%nibz*BS_n_eh_spaces) @@ -299,4 +336,4 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip) if (BS_res_ares_n_mat==2.or.BS_K_coupling) BS_H_dim=BS_K_dim(1)+BS_K_dim(2) endif ! -end subroutine +end subroutine K_Transitions_dimensions diff --git a/src/bse/K_components_folded_in_serial_arrays.F b/src/bse/K_components_folded_in_serial_arrays.F index 0e2ab8c321..47cd16f8c4 100644 --- a/src/bse/K_components_folded_in_serial_arrays.F +++ b/src/bse/K_components_folded_in_serial_arrays.F @@ -50,7 +50,7 @@ subroutine K_components_folded_in_serial_arrays(iq) call BSS_DICH_alloc( ) call BSS_PL_alloc( ) ! - if (BS_nT_grps>0) call live_timing("Folding BSE Kernel",PAR_IND_T_groups%n_of_elements(PAR_COM_eh_INDEX%CPU_id+1)) + if (BS_nT_grps>0) call live_timing("Unfolding BSE components",PAR_IND_T_groups%n_of_elements(PAR_COM_eh_INDEX%CPU_id+1)) ! BS_n_eh_spaces_solver=1 if(BS_n_eh_spaces==1.and.BS_K_coupling) BS_n_eh_spaces_solver=2 @@ -106,7 +106,7 @@ subroutine K_components_folded_in_serial_arrays(iq) ! if (l_BS_magnons) BSS_dipoles_magn(:,H_pos)=BS_T_grp(i_g)%dipoles_mag(:,i_Tr,i_res_ares_solver) ! - if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_dic(:,i_c,i_res_ares_solver) + if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_dic(:,i_Tr,i_res_ares_solver) ! if (l_BS_photolum) BSS_dipoles_PL(:,H_pos)=BS_T_grp(i_g)%dipoles_opt(:,i_Tr,i_res_ares_solver)*PL_weights(:) ! diff --git a/src/bse/K_correlation_collisions_dir.F b/src/bse/K_correlation_collisions_dir.F index 2024ae7980..d004339810 100644 --- a/src/bse/K_correlation_collisions_dir.F +++ b/src/bse/K_correlation_collisions_dir.F @@ -14,7 +14,7 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q) use D_lattice, ONLY:nsym use R_lattice, ONLY:qindx_X,bz_samp use BS, ONLY:O_ng,BS_bands,BS_res_K_corr,BS_blk,& -& BS_oscillators_alloc,K_CORR_collision,BS_T_grp,& +& K_CORR_collision,BS_T_grp,& & BS_K_dim,l_BS_ares_from_res use electrons, ONLY:n_sp_pol use collision_el, ONLY:elemental_collision_free,elemental_collision_alloc @@ -131,11 +131,11 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q) ! ! v->v', i.e. k-q -> p-q=k-q_W-q transitions ! - call EVAL_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_v_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s) + call SET_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_v_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s) ! ! c->c', i.e. k -> p=k-q_w transitions ! - call EVAL_table(i_block,1,1,i_c_k,i_c_p,i_k_sp_pol_c,i_k_s,i_p_s) + call SET_table(i_block,1,1,i_c_k,i_c_p,i_k_sp_pol_c,i_k_s,i_p_s) ! enddo enddo @@ -164,18 +164,18 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q) ! ! v->c', i.e. k-q -> p-q=k-q_W-q transitions ! - call EVAL_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_c_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s) + call SET_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_c_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s) ! ! c->v', i.e. k -> p=k-q_W transitions ! - call EVAL_table(i_block,1,1,i_c_k,i_v_p,i_k_sp_pol_c,i_k_s,i_p_s) + call SET_table(i_block,1,1,i_c_k,i_v_p,i_k_sp_pol_c,i_k_s,i_p_s) ! enddo enddo ! end select ! - call BS_oscillators_alloc(.FALSE.,0,.TRUE.,i_block) + call BS_correlation_oscillators_alloc(i_block) ! #if !defined _CUDA !$omp parallel default(shared), private(i_Tk,i_Tp, & @@ -375,12 +375,11 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q, & ! end subroutine fill_indexes ! - subroutine EVAL_table(iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko) + subroutine SET_table(iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko) implicit none integer, intent(in) :: iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko ! if( O_todo_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol) ) return - ! BS_blk(iB)%N_oscillators=BS_blk(iB)%N_oscillators+1 BS_blk(iB)%O_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol)=BS_blk(iB)%N_oscillators O_todo_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol)=.true. diff --git a/src/bse/K_correlation_collisions_std.F b/src/bse/K_correlation_collisions_std.F index 3c7fefcc5d..62a0d0eb28 100644 --- a/src/bse/K_correlation_collisions_std.F +++ b/src/bse/K_correlation_collisions_std.F @@ -36,7 +36,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q) use D_lattice, ONLY:nsym use R_lattice, ONLY:qindx_X,bz_samp use BS, ONLY:O_ng,BS_bands,BS_res_K_corr,BS_blk,& -& BS_oscillators_alloc,K_CORR_collision,BS_T_grp,& +& K_CORR_collision,BS_T_grp,& & BS_K_dim,l_BS_ares_from_res use electrons, ONLY:n_sp_pol use collision_el, ONLY:elemental_collision_free,elemental_collision_alloc @@ -66,7 +66,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q) ! logical, allocatable :: O_todo_table(:,:,:,:,:,:) ! - if (.not.BS_res_K_corr) return + if (.not.BS_res_K_corr) return ! call timing('T_space CORR Osc.',OPR='start') ! @@ -201,7 +201,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q) ! end select ! - call BS_oscillators_alloc(.FALSE.,0,.TRUE.,i_block) + call BS_correlation_oscillators_alloc(i_block) ! #if !defined _CUDA !$omp parallel default(shared), private(i_Tk,i_Tp, & diff --git a/src/bse/K_correlation_kernel_dir.F b/src/bse/K_correlation_kernel_dir.F index 6e974bfc2d..ff6f1e8ce0 100644 --- a/src/bse/K_correlation_kernel_dir.F +++ b/src/bse/K_correlation_kernel_dir.F @@ -40,8 +40,8 @@ function K_correlation_kernel_dir(i_block,i_p,i_pmq, return endif ! - iO1 = BS_blk(i_block)%O_table(i_k_s,i_p_s,1,1,i_n_k,i_n_p,i_k_sp_pol_n) - iO2 = BS_blk(i_block)%O_table(i_kmq_s,i_pmq_s,i_kmq_t,i_pmq_t,i_m_k,i_m_p,i_k_sp_pol_m) + iO1 = BS_blk(i_block)%O_table(i_k_s,i_p_s,1,1,i_n_k,i_n_p,i_k_sp_pol_n) + iO2 = BS_blk(i_block)%O_table(i_kmq_s,i_pmq_s,i_kmq_t,i_pmq_t,i_m_k,i_m_p,i_k_sp_pol_m) ! #ifdef _CUDA ! diff --git a/src/bse/K_diago_driver.F b/src/bse/K_diago_driver.F index 08cbac35e5..a4e43858da 100644 --- a/src/bse/K_diago_driver.F +++ b/src/bse/K_diago_driver.F @@ -24,16 +24,17 @@ subroutine K_diago_driver(iq,W,X_static) use pars, ONLY:SP use stderr, ONLY:intc use BS, ONLY:BS_K_coupling,BS_H_dim,BS_K_dim,BS_perturbative_SOC,& -& BS_res_ares_n_mat,l_BS_optics !,BS_Blocks_symmetrize_K,& +& BS_res_ares_n_mat,l_BS_optics,l_BS_dichroism !,BS_Blocks_symmetrize_K,& use BS_solvers, ONLY:io_BSS_diago,BSS_perturbative_width,BSS_mode,& & BSS_write_eig_2_db,BSS_eh_W,BS_mat,BSS_n_eig,BS_E use X_m, ONLY:X_t,X_OUTPUT_driver - use electrons, ONLY:n_sp_pol + use electrons, ONLY:n_sp_pol,n_spinor use frequency, ONLY:w_samp use com, ONLY:isec use LIVE_t, ONLY:live_timing use linear_algebra, ONLY:DIAGO - use interfaces, ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,PL_diago_residual + use interfaces, ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,& + & K_diago_left_residuals,K_diago_right_residuals,PL_diago_residual use IO_int, ONLY:io_control use IO_m, ONLY:REP,VERIFY,OP_WR_CL,OP_RD_CL use parser_m, ONLY:parser @@ -60,7 +61,7 @@ subroutine K_diago_driver(iq,W,X_static) ! Work Space ! integer :: i_BS_mat,Input_n_eig - logical :: K_is_not_hermitian,l_diago,l_slepc,l_target_energy + logical :: K_is_not_hermitian,l_diago,l_slepc,l_target_energy,l_dip_more character(1) :: sec character(5) :: Solver ! @@ -84,12 +85,14 @@ subroutine K_diago_driver(iq,W,X_static) ! ! Residuals, different for each response function !================================================= - complex(SP),pointer :: BS_R_left(:) => null() - complex(SP),pointer :: BS_R_right(:) => null() + complex(SP),pointer :: BS_R_left_abs(:) => null() + complex(SP),pointer :: BS_R_right_abs(:) => null() ! real(SP), pointer :: BS_R_PL(:,:) => null() ! + complex(SP),pointer :: BS_R_left_kerr(:) => null() complex(SP),pointer :: BS_R_right_kerr(:) => null() + complex(SP),pointer :: BS_R_right_dich(:,:) => null() ! complex(SP),pointer :: BS_R_left_magn(:,:) => null() complex(SP),pointer :: BS_R_right_magn(:,:) => null() @@ -102,6 +105,8 @@ subroutine K_diago_driver(iq,W,X_static) #if defined _SLEPC && !defined _NL l_slepc = index(BSS_mode,'s')/=0 #endif + ! + l_dip_more=l_BS_kerr.or.l_BS_dichroism ! if(l_diago) Solver="Diago" #if defined _SLEPC && !defined _NL @@ -145,8 +150,8 @@ subroutine K_diago_driver(iq,W,X_static) ! Diagonalization DB (IN) !======================== call io_control(ACTION=OP_RD_CL,COM=REP,MODE=VERIFY,SEC=(/1,2/),ID=ID) - io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,& -& BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL) + io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,& + & BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL) ! K_is_not_hermitian=BS_K_coupling.or.(allocated(BSS_eh_W).and..not.BSS_perturbative_width) ! @@ -230,20 +235,19 @@ subroutine K_diago_driver(iq,W,X_static) if(l_slepc) call MATRIX_slepc(slepc_mat,l_target_energy,BSS_n_eig,BS_mat,E_real=BS_E_real) #endif ! - BS_E=cmplx(BS_E_real,0._SP,SP) + BS_E=cmplx(BS_E_real,0._SP,kind=SP) ! endif ! #if defined _SLEPC && !defined _NL ! - ! BSS_n_eig could be lowered by MATRIX_slepc - if (Input_n_eig>BSS_n_eig) then - call K_slepc_resize_variables() + if (l_slepc) then + ! BSS_n_eig could be lowered by MATRIX_slepc + if (Input_n_eig>BSS_n_eig) call K_slepc_resize_variables() + ! Destroy the matrix + call MatDestroy(slepc_mat,ierr) endif ! - ! Destroy the matrix - !============================ - if(l_slepc) call MatDestroy(slepc_mat,ierr) #endif ! if(l_diago) call live_timing( ) @@ -251,20 +255,59 @@ subroutine K_diago_driver(iq,W,X_static) ! Construct the residuals of epsilon !==================================== if (K_is_not_hermitian) then + ! + ! Right residuals + if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then + call K_diago_right_residuals("opt1",i_BS_mat,BS_E,BS_R_right_abs, BS_V_right) + if(l_dip_more) call K_diago_right_residuals("opt2",i_BS_mat,BS_E,BS_R_right_kerr,BS_V_right) + endif + if(l_BS_dichroism) then + call K_diago_right_residuals("dic1",i_BS_mat,BS_E,BS_R_right_dich(:,1),BS_V_right) + call K_diago_right_residuals("dic2",i_BS_mat,BS_E,BS_R_right_dich(:,2),BS_V_right) + endif + if(l_BS_magnons) then + call K_diago_right_residuals("mag1",i_BS_mat,BS_E,BS_R_right_magn(1,:),BS_V_right) + if(n_spinor==2) call K_diago_right_residuals("mag2",i_BS_mat,BS_E,BS_R_right_magn(2,:),BS_V_right) + endif ! call K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap) ! - if (l_BS_optics ) call K_diago_non_hermitian_residuals(BS_E,BS_R_left,BS_R_right,BS_V_left,BS_V_right,BS_overlap) - if (l_BS_kerr ) call K_diago_kerr_residual(BS_E,BS_V_right,BS_R_right_kerr) - if (l_BS_magnons) call K_diago_non_herm_magnons_residual(BS_R_left_magn,BS_R_right_magn,BS_V_left,BS_V_right,BS_overlap) - if (l_BS_photolum)call PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap) + ! Left residuals + if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then + call K_diago_left_residuals("opt1",i_BS_mat,BS_E,BS_R_left_abs, BS_V_left,BS_overlap) + if(l_dip_more) call K_diago_left_residuals("opt2",i_BS_mat,BS_E,BS_R_left_kerr,BS_V_left,BS_overlap) + endif + if(l_BS_magnons) then + call K_diago_left_residuals("mag1",i_BS_mat,BS_E,BS_R_left_magn(1,:),BS_V_left,BS_overlap) + if(n_spinor==2) call K_diago_left_residuals("mag2",i_BS_mat,BS_E,BS_R_left_magn(2,:),BS_V_left,BS_overlap) + endif + ! + ! DS: this also should be changed according to the general structure + if(l_BS_photolum) call PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap) ! else ! - if (l_BS_optics) call K_diago_hermitian_residuals(i_BS_mat,BS_E_real,BS_R_left,BS_R_right) - if (l_BS_kerr ) call K_diago_kerr_residual(BS_E,BS_mat,BS_R_right_kerr) - if (l_BS_magnons) call K_diago_herm_magnons_residual(i_BS_mat,BS_R_left_magn,BS_R_right_magn) - if (l_BS_photolum)call PL_diago_residual(BS_mat,BS_mat,BS_R_PL,K_is_not_hermitian) + ! Right residuals + if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then + call K_diago_right_residuals("opt1", i_BS_mat,BS_E,BS_R_right_abs, BS_mat) + if(l_dip_more) call K_diago_right_residuals("opt2",i_BS_mat,BS_E,BS_R_right_kerr,BS_mat) + endif + if(l_BS_dichroism) then + call K_diago_right_residuals("dic1",i_BS_mat,BS_E,BS_R_right_dich(:,1),BS_mat) + call K_diago_right_residuals("dic2",i_BS_mat,BS_E,BS_R_right_dich(:,2),BS_mat) + endif + if(l_BS_magnons) then + call K_diago_right_residuals("mag1",i_BS_mat,BS_E,BS_R_right_magn(1,:),BS_mat) + if(n_spinor==2) call K_diago_right_residuals("mag2",i_BS_mat,BS_E,BS_R_right_magn(2,:),BS_mat) + endif + ! + ! Left residuals + if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) BS_R_left_abs =conjg(BS_R_right_abs) + if( l_BS_kerr.or.l_BS_dichroism) BS_R_left_kerr=conjg(BS_R_right_kerr) + if(l_BS_magnons) BS_R_left_magn=conjg(BS_R_right_magn) + ! + ! DS: this also should be changed according to the general structure + if(l_BS_photolum) call PL_diago_residual(BS_mat,BS_mat,BS_R_PL,K_is_not_hermitian) ! endif ! @@ -289,18 +332,18 @@ subroutine K_diago_driver(iq,W,X_static) ! endif ! - ! Now I calculate epsilon - !========================= - call K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,& - & BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL) + ! Now I calculate the physical quantities + !========================================= + call K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,& + & BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL) ! ! Diagonalization DB (OUT) !========================== if (io_err/=0) then ! call io_control(ACTION=OP_WR_CL,COM=REP,MODE=VERIFY,SEC=(/1,2,3/),ID=ID) - io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,& - & BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL) + io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,& + & BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL) ! endif ! @@ -338,12 +381,18 @@ subroutine local_alloc(what) YAMBO_ALLOC_P(BS_E_SOC_corr,(2/n_sp_pol,BSS_n_eig)) endif if (l_BS_optics) then - YAMBO_ALLOC_P(BS_R_left,(BSS_n_eig)) - YAMBO_ALLOC_P(BS_R_right,(BSS_n_eig)) + YAMBO_ALLOC_P(BS_R_left_abs,(BSS_n_eig)) + YAMBO_ALLOC_P(BS_R_right_abs,(BSS_n_eig)) endif if (l_BS_kerr) then YAMBO_ALLOC_P(BS_R_right_kerr,(BSS_n_eig)) endif + if (l_dip_more) then + YAMBO_ALLOC_P(BS_R_left_kerr,(BSS_n_eig)) + endif + if (l_BS_dichroism) then + YAMBO_ALLOC_P(BS_R_right_dich,(BSS_n_eig,2)) + endif if (l_BS_magnons) then YAMBO_ALLOC_P(BS_R_left_magn,(2,BSS_n_eig)) YAMBO_ALLOC_P(BS_R_right_magn,(2,BSS_n_eig)) @@ -386,9 +435,11 @@ subroutine local_free() YAMBO_FREE(BS_V_left) YAMBO_FREE_P(BS_overlap) YAMBO_FREE_P(BS_E_SOC_corr) - YAMBO_FREE_P(BS_R_right) - YAMBO_FREE_P(BS_R_left) + YAMBO_FREE_P(BS_R_right_abs) + YAMBO_FREE_P(BS_R_left_abs) YAMBO_FREE_P(BS_R_right_kerr) + YAMBO_FREE_P(BS_R_left_kerr) + YAMBO_FREE_P(BS_R_right_dich) YAMBO_FREE_P(BS_R_right_magn) YAMBO_FREE_P(BS_R_left_magn) YAMBO_FREE_P(BS_R_PL) @@ -411,10 +462,12 @@ subroutine K_slepc_resize_variables() if(allocated(BS_corrections)) call BSS_resize(BS_corrections) if(associated(BS_E_SOC_corr)) call BSS_resize(BS_E_SOC_corr) ! - if(associated(BS_R_left) ) call BSS_resize(BS_R_left) - if(associated(BS_R_right)) call BSS_resize(BS_R_right) + if(associated(BS_R_left_abs) ) call BSS_resize(BS_R_left_abs) + if(associated(BS_R_right_abs)) call BSS_resize(BS_R_right_abs) ! + if(associated(BS_R_left_kerr) ) call BSS_resize(BS_R_left_kerr) if(associated(BS_R_right_kerr)) call BSS_resize(BS_R_right_kerr) + if(associated(BS_R_right_dich)) call BSS_resize(BS_R_right_dich) if(associated(BS_R_left_magn) ) call BSS_resize(BS_R_left_magn) if(associated(BS_R_right_magn)) call BSS_resize(BS_R_right_magn) if(associated(BS_R_PL) ) call BSS_resize(BS_R_PL) diff --git a/src/bse/K_diago_left_residuals.F b/src/bse/K_diago_left_residuals.F new file mode 100644 index 0000000000..7114d920e4 --- /dev/null +++ b/src/bse/K_diago_left_residuals.F @@ -0,0 +1,113 @@ +! +! License-Identifier: GPL +! +! Copyright (C) 2006 The Yambo Team +! +! Authors (see AUTHORS file for details): AM MG DS +! +subroutine K_diago_left_residuals(mode,i_BS_mat,BS_E,BS_R_left,BS_V_left,BS_overlap) + ! + ! The left eigenvector u(j) of A satisfies + ! u(j)**H * A = lambda(j) * u(j)**H + ! where u(j)**H denotes the conjugate transpose of u(j). + ! + ! Remember: 1/(w-H)= \sum_ij |i right>^{-1) null() + ! + call PP_indexes_reset(px) + call PARALLEL_index(px,(/BSS_n_eig/)) + ! + if (.not.present(BS_overlap)) scheme="hermitian" + if ( present(BS_overlap)) scheme="non-hermitian" + ! + velocity_correction=(index(mode,"opt")>0).and.(trim(global_gauge)=="velocity") + ! + if (.not.BS_K_coupling) res_range=(/1,BS_K_dim(i_BS_mat)/)+BS_K_dim(1)*(i_BS_mat-1) + if ( BS_K_coupling) res_range=(/1,BS_H_dim/) + ! + allocate(tmp_res(res_range(1):res_range(2))) + n_res=res_range(2)-res_range(1)+1 + ! + if (trim(scheme)=="hermitian") BS_R_tmp => BS_R_left + if (trim(scheme)=="non-hermitian") allocate(BS_R_tmp(BSS_n_eig)) + ! + ! [1] BS_R_tmp(i)= + ! = conjg( BS_V_left(k,i))*BSS_dipoles(k)*R_k + BS_R_tmp=cZERO + ! + call live_timing('EPS L residuals',px%n_of_elements(myid+1)) + ! + if(trim(mode)=="opt1") tmp_res=BSS_dipoles_opt(1,res_range(1):res_range(2)) + if(trim(mode)=="opt2") tmp_res=BSS_dipoles_opt(2,res_range(1):res_range(2)) + if(trim(mode)=="dic1") tmp_res=BSS_dipoles_dich(1,res_range(1):res_range(2)) + if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) + if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2)) + if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2)) + ! + tmp_res = tmp_res*sqrt(cmplx(BSS_eh_f(res_range(1):res_range(2)),kind=SP)) + if (allocated(BSS_eh_Z)) tmp_res=tmp_res*sqrt(BSS_eh_Z(res_range(1):res_range(2))) + if (velocity_correction) tmp_res=tmp_res/BSS_eh_E(res_range(1):res_range(2)) + ! + do i_lambda=1,BSS_n_eig + ! + if (.not.px%element_1D(i_lambda)) cycle + ! + BS_R_tmp(i_lambda)=Vstar_dot_V_omp(n_res,BS_V_left(:,i_lambda),tmp_res) + if (velocity_correction) BS_R_tmp(i_lambda)=BS_R_tmp(i_lambda)*BS_E(i_lambda) + ! + call live_timing(steps=1) + ! + enddo + call PP_redux_wait(BS_R_tmp) + ! + call live_timing() + ! + if (trim(scheme)=="hermitian") return + ! + ! [2] BS_R_left(i)=BS_overlap(i,j)BS_R_tmp(j) + ! + BS_R_left=cZERO + ! + call live_timing('EPS L x overlap',px%n_of_elements(myid+1)) + do i_lambda=1,BSS_n_eig + if (.not.px%element_1D(i_lambda)) cycle + ! + BS_R_left(i_lambda)=V_dot_V_omp(BSS_n_eig,BS_overlap(i_lambda,:),BS_R_tmp) + ! + call live_timing(steps=1) + enddo + call PP_redux_wait(BS_R_left) + call live_timing + ! + call PP_indexes_reset(px) + ! +end subroutine K_diago_left_residuals diff --git a/src/bse/K_diago_overlap_matrix.F b/src/bse/K_diago_overlap_matrix.F new file mode 100644 index 0000000000..d697371978 --- /dev/null +++ b/src/bse/K_diago_overlap_matrix.F @@ -0,0 +1,51 @@ +! +! Copyright (C) 2000-2020 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): AM, MG, DS +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap) + ! + use pars, ONLY:SP,cZERO,cONE + use wrapper_omp, ONLY:M_by_M_omp + use interfaces, ONLY:LINEAR_ALGEBRA_driver + use LIVE_t, ONLY:live_timing + use linear_algebra, ONLY:INV + use BS, ONLY:BS_H_dim + use BS_solvers, ONLY:BSS_n_eig + ! + implicit none + ! + complex(SP),intent(in) :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig) + complex(SP),intent(out) :: BS_overlap(BSS_n_eig,BSS_n_eig) + ! + ! BS_overlap(i,j)=sum_k conjg(BS_V_left(k,i))*BS_V_right(k,j) + ! + BS_overlap=cZERO + ! + call live_timing('BSK overlap mat',1) + ! + call M_by_M_omp('c','n',BSS_n_eig,BSS_n_eig,BS_H_dim,cONE,BS_V_left,BS_H_dim,BS_V_right,BS_H_dim,cZERO,BS_overlap,BSS_n_eig) + ! + call LINEAR_ALGEBRA_driver(INV,M=BS_overlap) + call live_timing(steps=1) + call live_timing() + ! +end subroutine K_diago_overlap_matrix diff --git a/src/bse/K_diago_response_functions.F b/src/bse/K_diago_response_functions.F index aa32c2836c..b12c9fcba2 100644 --- a/src/bse/K_diago_response_functions.F +++ b/src/bse/K_diago_response_functions.F @@ -5,12 +5,13 @@ ! ! Authors (see AUTHORS file for details): DS AM MG ! -subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,& - & BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL) +subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,& + & BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL) ! ! eps2(iw)= 1 - Sum [BS_R_left(i)*BS_R_right(i)] / [w+i*eta - E(i)] ! - use pars, ONLY:cZERO,cONE,SP,pi + use pars, ONLY:cZERO,cONE,cI,SP,pi + use units, ONLY:SPEED_OF_LIGHT use X_m, ONLY:Resp_ii,Joint_DOS,BS_E_sorted,& & global_gauge,i_G_shift use electrons, ONLY:n_sp_pol,n_spinor @@ -21,23 +22,23 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS use BS, ONLY:BS_perturbative_SOC,BS_res_ares_n_mat,l_BS_abs,l_BS_jdos,l_BS_esort,& & BS_K_anti_resonant,l_BS_ares_from_res,BS_K_coupling use BS_solvers, ONLY:BSS_n_eig,Co_factor - use R_lattice, ONLY:bare_qpg + use R_lattice, ONLY:bare_qpg,q0_def_norm use units, ONLY:HA2EV use vec_operate, ONLY:degeneration_finder,sort use BS_solvers, ONLY:B_Hall,para_term_w0 - use X_m, ONLY:Resp_ij,X_magnons + use X_m, ONLY:Resp_ij,X_magnons,X_dichroism use PHOTOLUM, ONLY:PL,PL_prefactor - use BS, ONLY:l_BS_kerr,l_BS_magnons,l_BS_photolum,l_BS_dichroism + use BS, ONLY:l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,l_BS_dichroism ! implicit none ! type(w_samp),intent(in) :: W integer, intent(in) :: iq,i_BS_mat complex(SP), intent(in) :: BS_E(BSS_n_eig) - complex(SP), pointer, intent(in) :: BS_R_left(:),BS_R_right(:) + complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:) real(SP), pointer, intent(in) :: BS_E_SOC_corr(:,:) complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:) - complex(SP), pointer, intent(in) :: BS_R_right_kerr(:) + complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:) real(SP), pointer, intent(in) :: BS_R_PL(:,:) ! ! Work space @@ -48,7 +49,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS real(SP), allocatable :: vtmpR(:) complex(SP), allocatable :: BS_E_sorted_tmp(:,:) integer, allocatable :: sort_indx(:),first_exc(:),n_of_exc(:) - complex(SP) :: g_fac,res_kerr,res_magn(2) + complex(SP) :: g_fac,res_kerr,res_dich,res_magn(2) logical :: l_ADD_the_ARES_using_the_RES ! n_SOC=1 @@ -85,10 +86,17 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS if (.not.px%element_1D(i1)) cycle ! if (l_BS_abs) then - res_abs=BS_R_left(i1)*BS_R_right(i1)*ares_fac + res_abs=BS_R_left_abs(i1)*BS_R_right_abs(i1)*ares_fac endif if (l_BS_kerr) then - res_kerr=BS_R_left(i1)*BS_R_right_kerr(i1)*ares_fac + res_kerr=BS_R_left_abs(i1)*BS_R_right_kerr(i1)*ares_fac + if(l_BS_kerr_asymm) then + res_kerr=(BS_R_left_abs(i1)*BS_R_right_kerr(i1)-BS_R_left_kerr(i1)*BS_R_right_abs(i1))*ares_fac/2._SP + endif + endif + if(l_BS_dichroism) then + res_dich=(BS_R_left_abs(i1)*BS_R_right_dich(i1,1)+BS_R_left_kerr(i1)*BS_R_right_dich(i1,2)) + res_dich=res_dich*ares_fac/q0_def_norm/2._SP endif if(l_BS_magnons) res_magn(:)=BS_R_left_magn(:,i1)*BS_R_right_magn(:,i1)*ares_fac ! @@ -135,6 +143,18 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS endif B_Hall(2)=B_Hall(2)+res_kerr*g_fac endif + ! + if(l_BS_dichroism) then + if(l_BS_esort.and.i_BS_mat==1) then + BS_E_sorted_tmp(i1,nVAR+1:nVAR+1)=res_dich + nVAR=nVAR+1 + endif + X_dichroism(:,2)=X_dichroism(:,2)-res_dich/(W%p(:)-EXC_E(i_pert_SOC)) + if (l_ADD_the_ARES_using_the_RES) then + X_dichroism(:,2)=X_dichroism(:,2)+conjg(res_dich)/(W%p(:)+conjg(EXC_E(i_pert_SOC))) + endif + endif + ! if(l_BS_magnons) then if(l_BS_esort.and.i_BS_mat==1) then BS_E_sorted_tmp(i1,nVAR+1:nVAR+n_spinor)=res_magn(:n_spinor) @@ -147,6 +167,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS X_magnons(:,2,2)=X_magnons(:,2,2)+conjg(res_magn(1))/(W%p(:)+conjg(EXC_E(i_pert_SOC))) endif endif + ! if(l_BS_photolum) then PL(:,2)=PL(:,2)+PL_prefactor*ares_fac*BS_R_PL(1,i1)*aimag(-1._SP/(W%p(:)-EXC_E(i_pert_SOC)))/pi if (l_ADD_the_ARES_using_the_RES) then @@ -213,6 +234,10 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS call PP_redux_wait(B_Hall(2)) endif ! + if(l_BS_dichroism) then + call PP_redux_wait(X_dichroism(:,2)) + endif + ! if(l_BS_magnons) then call PP_redux_wait(X_magnons(:,:,2)) endif @@ -233,6 +258,9 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS if (BS_K_anti_resonant.and..not.BS_K_coupling) B_Hall(2)=B_Hall(2)-conjg(B_Hall(2)) endif ! + ! Factors to be fixed + if (l_BS_dichroism) X_dichroism(:,2)=X_dichroism(:,2)*(cI/SPEED_OF_LIGHT)*Co_factor/(4._SP*pi) + ! if (l_BS_magnons) X_magnons(:,:,2)=X_magnons(:,:,2)*Co_factor/(4._SP*pi) ! end subroutine K_diago_response_functions diff --git a/src/bse/K_diago_right_residuals.F b/src/bse/K_diago_right_residuals.F new file mode 100644 index 0000000000..5f97a86f71 --- /dev/null +++ b/src/bse/K_diago_right_residuals.F @@ -0,0 +1,79 @@ +! +! License-Identifier: GPL +! +! Copyright (C) 2022 The Yambo Team +! +! Authors (see AUTHORS file for details): DS +! +subroutine K_diago_right_residuals(mode,i_BS_mat,BS_E,BS_R_right,BS_V_right) + ! + ! BS_R_right_XX(i)= + ! =conjg(BSS_dipoles(j))*BS_V_right(j,i) + ! + use pars, ONLY:SP,cZERO,cI + use wrapper_omp, ONLY:V_dot_V_omp + use parallel_m, ONLY:PP_indexes,myid,PP_indexes_reset + use parallel_int, ONLY:PP_redux_wait,PARALLEL_index + use LIVE_t, ONLY:live_timing + use X_m, ONLY:global_gauge + use BS_solvers, ONLY:BSS_eh_E,BSS_eh_Z,BSS_eh_f,BSS_n_eig,BSS_dipoles_opt + use BS, ONLY:BS_K_dim,BS_H_dim,BS_K_coupling + use MAGNONS, ONLY:BSS_dipoles_magn + use DICHROISM, ONLY:BSS_dipoles_dich + ! + implicit none + ! + character(*),intent(in) :: mode + integer, intent(in) :: i_BS_mat + complex(SP), intent(in) :: BS_E(BSS_n_eig) + complex(SP), intent(out) :: BS_R_right(BSS_n_eig) + complex(SP), intent(in) :: BS_V_right(:,:) + ! + ! Workspace + ! + type(PP_indexes) ::px + integer ::i_lambda,n_res,res_range(2) + logical ::velocity_correction + ! + complex(SP),allocatable :: tmp_res(:) + ! + call PP_indexes_reset(px) + call PARALLEL_index(px,(/BSS_n_eig/)) + ! + BS_R_right=cZERO + velocity_correction=(index(mode,"opt")>0).and.(trim(global_gauge)=="velocity") + ! + if (.not.BS_K_coupling) res_range=(/1,BS_K_dim(i_BS_mat)/)+BS_K_dim(1)*(i_BS_mat-1) + if ( BS_K_coupling) res_range=(/1,BS_H_dim/) + ! + allocate(tmp_res(res_range(1):res_range(2))) + n_res=res_range(2)-res_range(1)+1 + ! + call live_timing(trim(mode)//' residuals ',px%n_of_elements(myid+1)) + ! + if(trim(mode)=="opt1") tmp_res=BSS_dipoles_opt(1,res_range(1):res_range(2)) + if(trim(mode)=="opt2") tmp_res=BSS_dipoles_opt(2,res_range(1):res_range(2)) + if(trim(mode)=="dic1") tmp_res=BSS_dipoles_dich(1,res_range(1):res_range(2)) + if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) + if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2)) + if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2)) + ! + tmp_res = conjg(tmp_res)*sqrt(cmplx(BSS_eh_f(res_range(1):res_range(2)),kind=SP)) + if (allocated(BSS_eh_Z)) tmp_res=tmp_res*sqrt(BSS_eh_Z(res_range(1):res_range(2))) + if (velocity_correction) tmp_res=tmp_res/BSS_eh_E(res_range(1):res_range(2)) + ! + do i_lambda=1,BSS_n_eig + if (.not.px%element_1D(i_lambda)) cycle + ! + BS_R_right(i_lambda)=V_dot_V_omp(n_res,tmp_res,BS_V_right(:,i_lambda)) + if (velocity_correction) BS_R_right(i_lambda)=BS_R_right(i_lambda)*BS_E(i_lambda) + ! + call live_timing(steps=1) + enddo + call PP_redux_wait(BS_R_right) + call live_timing + ! + call PP_indexes_reset(px) + deallocate(tmp_res) + ! +end subroutine K_diago_right_residuals diff --git a/src/bse/K_dipoles.F b/src/bse/K_dipoles.F index 6eebdb43fb..0e47c08552 100644 --- a/src/bse/K_dipoles.F +++ b/src/bse/K_dipoles.F @@ -13,13 +13,14 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip) use vec_operate, ONLY:v_norm use R_lattice, ONLY:bz_samp,q0_def_norm,qindx_X use stderr, ONLY:intc + use com, ONLY:msg use wave_func, ONLY:WF use stderr, ONLY:STRING_match use interfaces, ONLY:WF_load,WF_free use electrons, ONLY:levels,spin use X_m, ONLY:X_t,global_gauge,i_G_shift use DIPOLES, ONLY:DIPOLE_t,DIP_alloc,DIP_rotated - use BS_solvers, ONLY:BSS_Vnl_included + use BS_solvers, ONLY:BSS_Vnl_included,BSS_Q_dir,BSS_E_dir,BSS_P_dir use BS, ONLY:BS_T_grp,BS_K_coupling,BS_nT_grps,BSE_L_kind,BS_bands,& & BS_dip_size,l_BS_trace,BS_n_eh_spaces,& & l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,& @@ -29,7 +30,7 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip) & PAR_COM_Xk_ibz_INDEX use collision_el, ONLY:elemental_collision,elemental_collision_free,elemental_collision_alloc use timing_m, ONLY:timing - use vec_operate, ONLY:v_rotate + use vec_operate, ONLY:v_rotate,cross_product ! #include ! @@ -43,6 +44,7 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip) ! ! Work space ! + logical ::geometry_ok integer ::ik_bz,ok_bz,ik,ok,isymm,osymm,iGo,iv,ic,i_sp_pol_c,i_sp_pol_v,i_T_g,i_T,& & io_err,i_res_ares,i_dip complex(SP) ::DIP_Splus,DIP_Smins,DIP_projected(BS_dip_size),DIP_expanded(3) @@ -78,18 +80,35 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip) ! if (iq==1.and.i_G_shift==1) then ! - ! Directions for the external field + BSS_P_dir=cross_product(BSS_Q_dir,BSS_E_dir) + ! + if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) then + BSS_P_dir=0._SP + call v_rotate(trim(BSE_dipole_geometry),-pi/2._SP,BSS_E_dir,v_out=BSS_P_dir) + geometry_ok=abs(v_norm(BSS_P_dir)-1._SP)<1.E-5_SP .and. & + & abs(dot_product(BSS_E_dir,BSS_P_dir))<1.E-5_SP + if ( .not. geometry_ok) call error(" Field direction and BSEdips do not agree") + call msg("r"," BSEdips set in input. This imposes the propagation direction") + BSS_Q_dir=-cross_product(BSS_E_dir,BSS_P_dir) + endif ! - BS_field_direction(:,1)=Dip%q0/v_norm(Dip%q0) + geometry_ok=abs(dot_product(BSS_E_dir,BSS_Q_dir))<1.E-5_SP ! - if (l_BS_trace) then - BS_field_direction(:,1)=(/1._SP,0._SP,0._SP/) - BS_field_direction(:,2)=(/0._SP,1._SP,0._SP/) - BS_field_direction(:,3)=(/0._SP,0._SP,1._SP/) - else if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) then - call v_rotate(trim(BSE_dipole_geometry),-pi/2._SP,BS_field_direction(:,1),v_out=BS_field_direction(:,2)) + if ( l_BS_trace .and. .not. geometry_ok ) then + call warning(" Propagation direction not ortogonal to field polarization") + call warning(" Setting z for propagation and x-y for polarization") + BSS_E_dir=(/1._SP,0._SP,0._SP/) + BSS_P_dir=(/0._SP,1._SP,0._SP/) + BSS_Q_dir=(/0._SP,0._SP,1._SP/) + if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) BSE_dipole_geometry="xy" endif ! + ! Directions for the external field + ! + BS_field_direction(:,1)=BSS_E_dir ! external field + BS_field_direction(:,2)=BSS_P_dir ! induced polarization + BS_field_direction(:,3)=BSS_Q_dir ! field propagation direction + ! endif ! call live_timing('BSE dipoles',PAR_BS_nT_col_grps) diff --git a/src/bse/K_driver.F b/src/bse/K_driver.F index c9f84219b7..919c984d78 100644 --- a/src/bse/K_driver.F +++ b/src/bse/K_driver.F @@ -16,15 +16,17 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) use DIPOLES, ONLY:DIPOLE_t use IO_int, ONLY:io_control use IO_m, ONLY:OP_RD_CL,REP - use BS_solvers, ONLY:BSS_n_freqs,BSS_er,BSS_dr,BSS_mode,BSS_damp_reference,BSS_q0,BSS_uses_DbGd + use BS_solvers, ONLY:BSS_n_freqs,BSS_er,BSS_dr,BSS_mode,BSS_damp_reference,BSS_q0,BSS_uses_DbGd,& +& BSS_Q_dir,BSS_E_dir use BS, ONLY:BS_bands,BS_q,BS_n_g_exch,BS_res_K_corr,BS_n_g_W,BS_cpl_K_corr,& -& BS_Blocks_and_Transitions_free,BSqpts +& BS_Blocks_and_Transitions_free,BSqpts,O_ng use TDDFT, ONLY:FXC_per_memstps,FXC_n_mem_freqs,FXC_is_retarded use drivers, ONLY:l_rpa_IP,l_bs_fxc,l_bss use parallel_m, ONLY:PAR_IND_WF_b_and_k,PAR_K_scheme,PARALLEL_default_mode use parallel_int, ONLY:PP_wait,PP_redux_wait,PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index use interfaces, ONLY:eval_G_minus_G - use R_lattice, ONLY:bz_samp,qindx_B,qindx_X,qindx_B_max,qindx_free + use vec_operate, ONLY:v_norm + use R_lattice, ONLY:bz_samp,qindx_B,qindx_X,qindx_B_max,qindx_free,G_m_G_maxval,q0_def_norm ! #include ! @@ -37,7 +39,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) ! Work Space ! type(X_t) :: X_tddft,X_oscillators - integer :: iq,Max_G_m_G,iG_BSE,iGo_BSE + integer :: iq,Max_G_m_G,iG_BSE,iGo_BSE,VB(2),CB(2) type(w_samp) :: W_bss ! ! I/O @@ -57,6 +59,12 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) !========================= call K_driver_init("init",1,Ken,Xk) ! + ! q0 renormalization and field direction + ! + BSS_E_dir(:) = BSS_E_dir(:)/v_norm(BSS_E_dir) + BSS_Q_dir(:) = BSS_Q_dir(:)/v_norm(BSS_Q_dir) + BSS_q0(:) = BSS_E_dir(:)*q0_def_norm + ! ! Energy points !=============== call W_reset(W_bss) @@ -143,25 +151,14 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) ! W/X RL size setup looking at the max index of G-G' ! ========= ! - iGo_BSE=qindx_B_max iG_BSE =BS_n_g_W - if (.not.iq==1) then - iGo_BSE=maxval((/4*qindx_B_max,qindx_X(iq,:,2)/)) - iG_BSE =max(BS_n_g_exch,BS_n_g_W) - !iGo_BSE=4*maxval((/4*qindx_B_max,qindx_X(iq,:,2)/)) - !iG_BSE =maxval((/BS_n_g_exch,BS_n_g_W,qindx_B_max/)) - endif + iGo_BSE=qindx_B_max + ! This needs be changed in case i_gW is redefined vi G_m_G + !if (.not.iq==1) iGo_BSE=maxval((/4*qindx_B_max,qindx_X(iq,:,2)/)) ! + ! This is the size of the first dimension of the G_m_G table Max_G_m_G=eval_G_minus_G(iG_BSE,iGo_BSE) - ! - if (.not.iq==1) then - if (BS_n_g_exch> Max_G_m_G ) then - call warning (' Exch size reduced from '//trim(intc(BS_n_g_exch))//' to '& -& //trim(intc(Max_G_m_G))//' due to symmetry constraints') - BS_n_g_exch= Max_G_m_G - call msg('s','[BSE-exch] Exch size forced by symmetry to be ',BS_n_g_exch) - endif - endif + O_ng=G_m_G_maxval ! if (BS_n_g_W> Max_G_m_G ) then call warning (' W size reduced from '//trim(intc(BS_n_g_W))//' to '& @@ -175,20 +172,20 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) call DIPOLE_dimensions(Ken,Dip,BS_bands,BSS_q0) ! ! ... EH space (no parallelization) - call K_Transitions_setup("K",iq,Ken,Xk,Dip) + call K_Transitions_dimensions(iq,Ken,Xk,Dip,VB,CB) ! !... Parallel distribution call PARALLEL_global_Response_T_transitions(Xk) ! ! ... EH space (with parallelization) and Transition groups build-up - call K_Transitions_setup("T",iq,Ken,Xk,Dip) + call K_Transitions_setup(iq,Ken,Xk,Dip,VB,CB) ! !... Blocks build-up if (.not.l_rpa_IP) call K_blocks() ! ! Wave Functions distribution !============================= - call PARALLEL_WF_distribute(B_and_K_index=PAR_IND_WF_b_and_k,CLEAN_UP=.FALSE.) + call PARALLEL_WF_distribute(B_and_K_index=PAR_IND_WF_b_and_k,CLEAN_UP=iq==BS_q(1)) call PARALLEL_WF_index( ) ! ! 0. The Dipoles @@ -201,7 +198,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip) ! ! 2. The KERNEL ! ================ - if (.not.l_rpa_IP) call K(iq,Ken,Xk,q,X_static_pp,Xw,W_bss) + if (.not.l_rpa_IP) call K_kernel(iq,Ken,Xk,q,X_static_pp,Xw,W_bss) ! ! 3. The SOLVER ! ================= diff --git a/src/bse/K_driver_init.F b/src/bse/K_driver_init.F index a8d58a104b..f92637f164 100644 --- a/src/bse/K_driver_init.F +++ b/src/bse/K_driver_init.F @@ -8,7 +8,7 @@ subroutine K_driver_init(what,iq,Ken,Xk) ! use R_lattice, ONLY:bz_samp - use electrons, ONLY:levels,n_sp_pol,n_spinor + use electrons, ONLY:levels,n_sp_pol,n_spinor,n_spin use parser_m, ONLY:parser use parallel_m, ONLY:PARALLEL_default_mode use stderr, ONLY:STRING_match @@ -19,12 +19,15 @@ subroutine K_driver_init(what,iq,Ken,Xk) & BS_perturbative_SOC,l_BS_magnons,l_BS_photolum,& & BS_cpl_K_exchange,BS_n_g_exch,BS_res_K_exchange,BS_K_coupling,BS_res_ares_n_mat,& & BS_n_eh_spaces,l_BS_ares_from_res,BS_bands,BS_K_anti_resonant - use TDDFT, ONLY:FXC_is_retarded,FXC_n_g_corr + use TDDFT, ONLY:FXC_is_retarded,FXC_n_g_corr,l_Fxc_from_Vxc use drivers, ONLY:l_col_cut,l_rpa_IP,l_alda_fxc,l_bs_fxc use D_lattice, ONLY:i_time_rev,i_space_inv,l_3D #if defined _RT use drivers, ONLY:l_rt_carriers_in_use use RT_control, ONLY:NEQ_Kernel,EQ_Transitions,EQ_NoOcc,NEQ_Residuals,RT_BSE_Occ_Mode +#endif +#if defined _CUDA + use TDDFT, ONLY:FXC_mode #endif ! implicit none @@ -36,7 +39,7 @@ subroutine K_driver_init(what,iq,Ken,Xk) ! ! Work Space ! - logical :: l_flag,l_slepc_special + logical :: l_flag,l_slepc_special,l_Fxc_Libxc ! ! Defaults !========== @@ -143,7 +146,7 @@ subroutine K_driver_init(what,iq,Ken,Xk) if (iq/=1.and.i_time_rev==0.and.i_space_inv==0 ) l_BS_ares_from_res=.FALSE. if (l_BS_photolum) l_BS_ares_from_res=.FALSE. if (l_BS_magnons.and.n_sp_pol==2) l_BS_ares_from_res=.FALSE. - ! The next line is to switch of section of the codes due to the calculation + ! The next line is to switch off sections of the code due to the calculation ! of the ARES part without checking the BSE_mode (see io_BS_PAR_init for example) if (trim(BSE_mode)=="resonant") l_BS_ares_from_res=.TRUE. ! @@ -162,6 +165,16 @@ subroutine K_driver_init(what,iq,Ken,Xk) & call error('set Lkind = DEFAULT or BAR or FULL') ! BS_K_is_ALDA=l_alda_fxc + if (l_alda_fxc) then + call parser('FxcLibxc',l_Fxc_Libxc) + l_Fxc_from_Vxc=(n_spin==1).and..not.l_Fxc_Libxc + endif +#if defined _CUDA + if ( BS_K_is_alda.and. index(FXC_mode,"G-")>0 ) then + call warning(" Tddft with G-integrals is not GPU ported. Fallback to R-integrals") + FXC_mode="R-def" + endif +#endif BS_dip_size=1 ! ! Memory saving diff --git a/src/bse/K_exchange_collisions.F b/src/bse/K_exchange_collisions.F index 3933c53ba8..c24cc5c5dd 100644 --- a/src/bse/K_exchange_collisions.F +++ b/src/bse/K_exchange_collisions.F @@ -5,12 +5,14 @@ ! ! Authors (see AUTHORS file for details): AM ! -subroutine K_exchange_collisions(iq,Xk,i_T_grp) +subroutine K_exchange_collisions(iq,Xk,i_T_grp,NG,l_bs_exch_wf_in_loop) ! use pars, ONLY:SP,cZERO use D_lattice, ONLY:nsym,i_time_rev,sop_inv,sop_inv_d - use BS, ONLY:BS_T_grp,K_EXCH_collision,BS_oscillators_alloc,BS_n_g_exch,& -& BSE_L_kind,l_BS_magnons + use BS, ONLY:BS_T_grp,K_EXCH_collision,BS_n_g_exch,& +& BSE_L_kind,l_BS_magnons,l_BSE_minimize_memory,BS_bands + use interfaces, ONLY:WF_load,WF_free + use wave_func, ONLY:WF use R_lattice, ONLY:g_rot,g_rot_d,qindx_X,bz_samp use stderr, ONLY:STRING_match use electrons, ONLY:n_sp_pol @@ -24,34 +26,30 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp) implicit none ! type(bz_samp),intent(in) :: Xk - integer, intent(in) :: iq,i_T_grp + integer, intent(in) :: iq,i_T_grp,NG(2) + logical, intent(in) :: l_bs_exch_wf_in_loop ! ! Work Space ! + logical :: l_load_WFs + integer :: NK(2) integer :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,& & i_sp,i_T_el_last_with_identity_sym,i_T_grp_last_with_identity_sym,i_T_grp_p #ifdef _CUDA complex(SP), pointer, device :: O_x_d(:,:),O_x_sym_d(:,:) #endif - ! - ! Oscillators already done ? - ! - if (allocated( BS_T_grp(i_T_grp)%O_x )) return - ! - if (l_BS_magnons.and.n_sp_pol==2) return ! call timing('T_space EXX Osc.',OPR='start') ! ! Allocate ! call elemental_collision_free(K_EXCH_collision) - call BS_oscillators_alloc(.TRUE.,i_T_grp,.FALSE.,0) ! #if defined(__NOTNOW) && ! defined(_CUDA) !$omp parallel default(shared), private( K_EXCH_collision, & !$omp & i_T_el,i_k_bz,i_k,i_s, i_p_bz,i_p,i_sp, i_v,i_c,i_sp_c,i_sp_v, & !$omp & i_T_grp_last_with_identity_sym, i_T_el_last_with_identity_sym, & - !$omp & i_T_grp_p, N_T_el_p, i_T_el_p, i_g1, i_g2) + !$omp & i_T_grp_p, N_T_el_p, i_T_el_p, i_g1, i_g2, NK) #endif ! call OPENMP_update(master_thread) @@ -61,6 +59,7 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp) #if defined(__NOTNOW) && ! defined(_CUDA) !$omp do schedule(dynamic) #endif + NK=-1 T_loop: do i_T_el=1,BS_T_grp(i_T_grp)%size ! i_k_bz=BS_T_grp(i_T_grp)%table(i_T_el,1) @@ -82,6 +81,20 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp) K_EXCH_collision%os=(/i_v,i_p,i_sp,i_sp_v/) K_EXCH_collision%qs=(/i_g_p,iq,1/) ! + l_load_WFs= l_bs_exch_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p)) + if (l_load_WFs) then +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp critical +#endif + if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) + NK=(/min(i_k,i_p),max(i_k,i_p)/) + call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,& + & space='R',title="Kernel exch",keep_states_to_load=.true.) +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp end critical +#endif + endif + ! if (iq==1) then ! ! G==0 term @@ -173,6 +186,8 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp) #if defined(__NOTNOW) && ! defined(_CUDA) !$omp end parallel #endif + ! + if(l_bs_exch_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) ! call timing('T_space EXX Osc.',OPR='stop') ! diff --git a/src/bse/K.F b/src/bse/K_kernel.F similarity index 69% rename from src/bse/K.F rename to src/bse/K_kernel.F index 578df49b0a..09e082c441 100644 --- a/src/bse/K.F +++ b/src/bse/K_kernel.F @@ -5,47 +5,47 @@ ! ! Authors (see AUTHORS file for details): AM DS AF ! -subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) +subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) ! - ! K = <2V-W> for n_spin=1 + ! K = <2V-W> for n_spin=1; K= for n_spin>1 ! use pars, ONLY:SP,schlen,pi,cZERO,cI - use stderr, ONLY:intc use LOGO, ONLY:pickup_a_random use drivers, ONLY:l_bs_fxc,l_tddft,l_rim_w use frequency, ONLY:w_samp - use electrons, ONLY:levels,spin_occ,spin,n_spin,n_spinor + use electrons, ONLY:levels,spin_occ,spin,n_spin,n_spinor,n_sp_pol use FFT_m, ONLY:fft_size use interfaces, ONLY:WF_load,WF_free,eval_G_minus_G,TDDFT_ALDA_eh_space_R_kernel use D_lattice, ONLY:nsym,DL_vol,i_time_rev,i_space_inv,sop_inv,sop_tab - use R_lattice, ONLY:G_m_G,qindx_B,bz_samp,RIM_W,& -& qindx_X,qindx_B_max,qindx_B_load,& + use R_lattice, ONLY:qindx_X,bz_samp,& +& qindx_B,qindx_B_max,qindx_B_load,RIM_W,& & RIM_ng,RIM_W_is_diagonal,RIM_W_ng use com, ONLY:msg use stderr, ONLY:intc use LIVE_t, ONLY:live_timing use X_m, ONLY:X_t + use interfaces, ONLY:el_density_and_current use QP_m, ONLY:QP_ng_Vxc use BS, ONLY:BS_bands,BS_res_K_corr,BS_W,BS_W_is_diagonal,& -& BS_res_K_exchange,BS_Block_size,& +& BS_res_K_exchange,BS_Block_size,BS_n_g_W,& & O_ng,BS_n_g_exch,BS_n_g_fxc,BS_identifier,BS_LiveTiming_steps,& & BS_K_dim,BS_K_is_ALDA,BS_cpl_K_exchange,& & BS_cpl_K_corr,K_EXCH_collision,K_CORR_collision,& -& BS_oscillators_free,WF_phase,n_BS_blks,BS_blk,BS_T_grp,& +& WF_phase,n_BS_blks,BS_blk,BS_T_grp,& & BS_nT_grps,BS_blks_free,l_BS_ares_from_res,& & l_BSE_minimize_memory,l_BSE_restart,l_BSE_kernel_complete,& -& BS_perturbative_SOC,BS_K_cutoff,BS_max_val +& BS_perturbative_SOC,BS_K_cutoff,BS_max_val,l_BS_magnons use collision_el, ONLY:elemental_collision_free use IO_int, ONLY:io_control use IO_m, ONLY:REP,OP_WR,RD,WR_CL,OP_APP,deliver_IO_error_message,WR use TDDFT, ONLY:FXC_K_diagonal,F_xc_gspace,FXC_n_g_corr,io_BS_Fxc, & -& FXC_mode,tddft_wf_t +& FXC_mode,tddft_wf_t,l_Fxc_from_Vxc use xc_functionals,ONLY:V_xc,F_xc,F_xc_mat,magn,XC_potential_driver use global_XC, ONLY:WF_xc_functional,WF_kind,WF_exx_fraction use wave_func, ONLY:WF use openmp, ONLY:OPENMP_update,master_thread use timing_m, ONLY:timing - use parallel_m, ONLY:myid,ncpu,PAR_K_scheme + use parallel_m, ONLY:myid,ncpu,PAR_K_scheme,PARALLEL_default_mode,master_cpu use parallel_int, ONLY:PP_redux_wait use MAGNONS, ONLY:BSS_MAGN_free use PHOTOLUM, ONLY:BSS_PL_free @@ -78,20 +78,19 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) integer :: i_k_bz_last,i_p_bz_last,i_pmq_bz_last,i_kmq_bz_last, & & iq_W_bz_last,ig_W_last,iq_W_bz_mq_last,ig_W_mq_last ! - integer :: i_block,O_ng_shift,NG,& + integer :: i_block,O_ng_shift,O_ng_exch,O_ng_tddft,iHxc,NG(2),NK(2),& & ig_kmq,ig_pmq,ig_W_mq,iq_W_bz_mq,iq_W_mq,iq_W_s_mq,& & bands_to_load(2),is_k(4),os_k(4),is_p(4),os_p(4) - complex(SP):: Co,H_x,H_c + complex(SP):: Co,H_x,H_c,BS_mat_tmp complex(SP):: sqrt_f_itk,sqrt_f_itp - real(SP), allocatable :: BS_max_val_CPU(:) + real(SP), allocatable :: BS_max_val_CPU(:),rho(:) ! complex(SP), external ::K_correlation_kernel_std,K_correlation_kernel_dir complex(SP), external ::TDDFT_ALDA_eh_space_G_kernel complex(SP), external ::K_exchange_kernel_resonant,K_exchange_kernel_coupling ! - ! Oscillators - ! - logical :: load_O_X,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum + logical :: l_bs_exch,l_bs_corr,l_bs_exch_wf_in_loop,l_bs_tddft_wf_in_loop,l_matrix_init,& + & l_load_kernel,l_write_kernel,l_skip_phases,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum ! ! I/O ! @@ -149,51 +148,57 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! call section('=',trim(section_title)) ! - ! Exchange - !========== - ! - load_O_X=BS_res_K_exchange.or.l_bs_fxc + l_bs_exch=(any((/BS_res_K_exchange,BS_cpl_K_exchange/)).or.l_bs_fxc).and..not.(l_BS_magnons.and.n_sp_pol==2) + l_bs_corr=(any((/BS_res_K_corr,BS_cpl_K_corr/)) .and. .not.l_bs_fxc) ! - if (load_O_X) call msg('r','[BSE] Exchange components ',BS_n_g_exch) + ! Oscillators Setup + !=================== ! - if (iq/=1 .and. n_spinor==2 .and. nsym>1) then - ! Default is direct algorithm, unless imposed from input - call parser('ImposeStdAlg',l_std_alg) - l_dir_alg=.not.l_std_alg - else - ! Default is standard algorithm, unless imposed from input - call parser('ImposeDirAlg',l_dir_alg) - l_std_alg=.not.l_dir_alg + if (l_bs_exch) then + O_ng_exch =maxval(qindx_X(iq,:,2)) + ! I load the WFs for exachange in a loop in two cases + ! - q=0 and more than one k-point. + ! In this case it is the default, unless I specify workload in the parallel mode + ! - q/=0 + ! In this case it is activated only if I specify KQmemory in the parallel mode + ! The code would work fine also in this case, however it would require continuous loading and unloading of WFs. + ! To avoid this one would need to move the loop over i_star in K_Transitions_setup outside the + ! loop over ic and iv (see related comment in K_Transiontion_setup.F) + l_bs_exch_wf_in_loop= ((iq==1) .and. (Xk%nbz>=1).and..not.trim(PARALLEL_default_mode)=="workload") .or. & + & ((iq> 1) .and. trim(PARALLEL_default_mode)=="KQmemory") + call msg('r','[BSE] Exchange components ',BS_n_g_exch) endif ! - l_tddft_gsum=.false. - l_tddft_rsum=.false. if (BS_K_is_ALDA) then + O_ng_tddft =maxval(qindx_X(iq,:,2)) l_tddft_gsum=(index(FXC_mode,"G-")>0) l_tddft_rsum=(index(FXC_mode,"R-")>0) if( .not.(l_tddft_gsum.or.l_tddft_rsum) ) call error("Wrong FXC_mode:"//trim(FXC_mode)) + if(index(FXC_mode,"cut_GmGp")>0) then + FXC_n_g_corr=BS_n_g_fxc + if (BS_n_g_fxc/= eval_G_minus_G(BS_n_g_fxc,0) ) then + BS_n_g_fxc = eval_G_minus_G(BS_n_g_fxc,0) + call msg('s','[TDDFT] Fxc cutoff is forced by symmetry to be ',BS_n_g_fxc) + endif + endif + l_bs_tddft_wf_in_loop= ((iq==1) .and. (Xk%nbz>=1).and.(.not.trim(PARALLEL_default_mode)=="workload") ) .or. & + & ((iq> 1) .and. trim(PARALLEL_default_mode)=="KQmemory" ) + call msg('r','[BSE] ALDA components ',BS_n_g_fxc) endif ! - ! Oscillators Setup - !=================== - O_ng=1 - O_ng_shift=maxval(qindx_X(iq,:,2)) - ! - if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) then - O_ng=maxval(G_m_G) - call fft_check_size(1,O_ng,BS_n_g_exch,"Kx") - ! Hybrid functional. - ! The cutoff on the non-hybrid part, BS_n_g_fxc, need to be lowered - if (BS_K_is_ALDA) call fft_check_size(1,O_ng,BS_n_g_fxc,"Kx") - if(iq/=1) O_ng_shift=max(qindx_B_max,O_ng_shift) - endif - ! - if(BS_K_is_ALDA.and.index(FXC_mode,"cut_GmGp")>0) then - FXC_n_g_corr=BS_n_g_fxc - if (BS_n_g_fxc/= eval_G_minus_G(BS_n_g_fxc,0) ) then - BS_n_g_fxc = eval_G_minus_G(BS_n_g_fxc,0) - call msg('s','[TDDFT] ALDA Fxc cutoff is forced by symmetry to be ',BS_n_g_fxc) + if (l_bs_corr) then + O_ng_shift=qindx_B_max + if (iq/=1 .and. n_spinor==2 .and. nsym>1) then + ! Default is direct algorithm, unless imposed from input + call parser('ImposeStdAlg',l_std_alg) + l_dir_alg=.not.l_std_alg + else + ! Default is standard algorithm, unless imposed from input + call parser('ImposeDirAlg',l_dir_alg) + l_std_alg=.not.l_dir_alg endif + call parser('SkipPhases',l_skip_phases) + call msg('r','[BSE] Correlation components ',(/BS_n_g_W,O_ng/)) endif ! call K_restart(iq,X,ID_head,ID,ID_compr,l_partial_kernel_loaded) @@ -201,7 +206,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) if( l_BSE_kernel_complete ) return ! io_QINDX_err=0 - if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) io_QINDX_err=qindx_B_init(qindx_ID,qindx_ID_frag) + if (l_bs_corr) io_QINDX_err=qindx_B_init(qindx_ID,qindx_ID_frag) if(io_QINDX_err/=0) call error('Error reading qindx_B database ') ! ! Screened interaction @@ -213,7 +218,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) if (l_rim_w) call QP_interpolate_W(X,Xw,q,'BSE') ! io_X_err=0 - if (BS_res_K_corr) call K_screened_interaction(X,Xw,q,io_X_err) + if (l_bs_corr) call K_screened_interaction(X,Xw,q,io_X_err) ! if (io_X_err<0) then call deliver_IO_error_message(io_X_err,'PP/Em1s') @@ -249,30 +254,66 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! endif ! - ! Wave Functions - !================ - bands_to_load=BS_bands - if (BS_K_is_ALDA) bands_to_load=(/1,BS_bands(2)/) + ! DB identifier + !=============== + BS_identifier=pickup_a_random(10000._SP) + ! + ! BS DB description section I/O + !=============================== ! - section_title='-BSK' - if (l_bs_fxc) section_title='-BSK->Fxc' + call section('=','Main kernel loop') ! - NG=max(O_ng,BS_n_g_exch) - if (BS_K_is_ALDA) NG=max(NG,BS_n_g_fxc) + if(BS_K_cutoff>0._SP) then + YAMBO_ALLOC(BS_max_val_CPU,(ncpu)) + BS_max_val_CPU=0._SP + endif + ! +#if defined _PAR_IO + ! + call io_control(ACTION=OP_APP,COM=REP,ID=ID_head) + io_BS_err=io_BS_header(iq,X,ID_head,"full") + ! + call io_control(ACTION=OP_APP,COM=REP,ID=ID,DO_IT=.TRUE.) + io_BS_err=io_BS_PAR_init(iq,ID,"full") + ! +#else + call io_control(ACTION=OP_WR,COM=REP,SEC=(/1/),ID=ID) + io_BS_err=io_BS(iq,X,ID) +#endif + ! + ! This loop is on the exchange and correlation of the BSE + ! iHxc=1 --> K_H=v + ! iHxc=2 --> K_TDDFT=fxc + ! iHxc=3 --> K_c=W + ! + l_matrix_init=.true. + l_load_kernel=.true. + l_write_kernel=.false. + ! + BS_blk_done=.false. + ! + do iHxc=1,3 + ! + if ((.not.l_bs_exch).and.iHxc==1) cycle ! - call WF_load(WF,NG,O_ng_shift,bands_to_load,(/1,Xk%nibz/),space='R',title=trim(section_title)) + if ((.not.BS_K_is_ALDA).and.iHxc==2) cycle ! - ! Wave Functions Phases - !======================= - if(l_std_alg) call K_WF_phases(Xk) - ! - ! Spatial Inversion Test - !======================== - call WF_spatial_inversion(Ken,Xk) + if ((.not.l_bs_corr).and.iHxc==3) cycle + ! + section_title='-BSK-'//trim(intc(iHxc)) + if (l_bs_fxc) section_title='-BSK->Fxc-'//trim(intc(iHxc)) + ! + if (iHxc==1) NG=(/BS_n_g_exch,O_ng_exch/) + if (iHxc==2) NG=(/BS_n_g_fxc ,O_ng_tddft/) + if (iHxc==3) NG=(/O_ng ,O_ng_shift/) ! ! ALDA !====== - if (BS_K_is_ALDA) then + if (iHxc==2) then + ! + call WF_load(WF,NG(1),NG(2),(/1,Ken%nbm/),(/1,Xk%nibz/),space='R',title="-TDDFT",keep_states_to_load=.true.) + ! + QP_ng_Vxc=BS_n_g_fxc ! if (l_tddft_rsum) then YAMBO_ALLOC(tddft_wf%rhotwr1,(fft_size*n_spinor*n_spinor)) @@ -280,13 +321,16 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) else if (l_tddft_gsum) then YAMBO_ALLOC(tddft_wf%rhotwr_DP,(fft_size)) endif + ! YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor)) YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor)) ! YAMBO_ALLOC(F_xc,(fft_size,n_spin,n_spin)) - YAMBO_ALLOC(V_xc,(fft_size,n_spin)) - V_xc=0._SP - QP_ng_Vxc=BS_n_g_fxc + if(l_BS_magnons .or. n_spinor==2 .or. l_Fxc_from_Vxc) then + YAMBO_ALLOC(V_xc,(fft_size,n_spin)) + V_xc=0._SP + endif + ! if(n_spin>1) then YAMBO_ALLOC(magn,(fft_size,3)) endif @@ -294,40 +338,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) YAMBO_ALLOC(F_xc_mat,(fft_size,n_spin,n_spin,n_spin,n_spin)) call Build_F_xc_mat(V_xc,F_xc,F_xc_mat) YAMBO_FREE(F_xc) - YAMBO_FREE(V_xc) + if(l_BS_magnons .or. n_spinor==2 .or. l_Fxc_from_Vxc) then + YAMBO_FREE(V_xc) + endif if(n_spin>1) then YAMBO_FREE(magn) endif + ! call TDDFT_ALDA_lower_cutoff(FXC_mode) ! + call WF_free(WF,keep_states_to_load=.true.) + ! endif - ! - ! DB identifier - !=============== - BS_identifier=pickup_a_random(10000._SP) - ! - ! BS DB description section I/O - !=============================== - ! - call section('=','Main kernel loop') - ! - if(BS_K_cutoff>0._SP) then - YAMBO_ALLOC(BS_max_val_CPU,(ncpu)) - BS_max_val_CPU=0._SP - endif - ! -#if defined _PAR_IO - ! - call io_control(ACTION=OP_APP,COM=REP,ID=ID_head,DO_IT=.TRUE.) - io_BS_err=io_BS_header(iq,X,ID_head,"full") - ! - call io_control(ACTION=OP_APP,COM=REP,ID=ID,DO_IT=.TRUE.) - io_BS_err=io_BS_PAR_init(iq,ID,"full") - ! -#else - call io_control(ACTION=OP_WR,COM=REP,SEC=(/1/),ID=ID) - io_BS_err=io_BS(iq,X,ID) -#endif ! ! Timing !======== @@ -340,10 +362,8 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! First load previously computed matrix elements. !================================================= ! - BS_blk_done=.false. - ! #ifdef _PAR_IO - if (l_BSE_restart) then + if (l_BSE_restart.and.l_load_kernel) then ! if (m_steps>0) call live_timing("Loading partial kernel",m_steps) ! @@ -380,16 +400,42 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! enddo ! - call live_timing() + if (m_steps>0) call live_timing() ! call msg("s","Kernel loaded percentual ",real(n_steps-m_steps,SP)/real(n_steps,SP)*100._SP,'[%]') ! + l_load_kernel=.false. + ! endif #endif + ! + if ( (iHxc==1 .and. l_bs_exch_wf_in_loop) .or. (iHxc==2.and.l_bs_tddft_wf_in_loop.and.l_tddft_gsum)) then + ! DS this is used only for iq==1 + call fft_setup(NG(1),NG(2),.true.) + else + ! + call WF_load(WF,NG(1),NG(2),BS_bands,(/1,Xk%nibz/),space='R',& + & title=trim(section_title),keep_states_to_load=.true.) + ! + ! Wave Functions Phases + !======================= + if (iHxc==3.and.l_std_alg) then + call K_WF_phases(Xk) + if (l_skip_phases) then + WF_phase=-99._SP + call msg('r','[BSE] WF phases ignored as imposed in input') + endif + endif + ! + endif ! if (.not.l_bs_fxc) section_title='Kernel' if ( l_bs_fxc) section_title='Kernel->Fxc' ! + if (iHxc==1) section_title=trim(section_title)//' exch' + if (iHxc==2) section_title=trim(section_title)//' tddft' + if (iHxc==3) section_title=trim(section_title)//' corr' + ! if (m_steps>0) call live_timing(trim(section_title),m_steps) ! !------------------ @@ -398,10 +444,17 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! block_loop:& do i_block=1,n_BS_blks + ! + mode_now=BS_blk(i_block)%mode + if (.not.l_BS_ares_from_res) mode_now="F" + ! + if (iHxc==1) l_write_kernel=.not.(BS_K_is_ALDA.or.l_bs_corr) + if (iHxc==2) l_write_kernel=.not. l_bs_corr + if (iHxc==3) l_write_kernel=.true. ! if (BS_blk_done(i_block)) then #if defined _PAR_IO - if(l_partial_kernel_loaded) then + if(l_partial_kernel_loaded.and.l_write_kernel) then call io_control(ACTION=WR,ID=ID) call io_BS_PAR_block(iq,i_block,ID,"full") endif @@ -414,6 +467,13 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) if (BS_LiveTiming_steps>= 0) i_steps=BS_Block_size(i_block) if (BS_LiveTiming_steps==-1) i_steps=1 ! + ! Nothing to do, just write the kernel to disk + if (trim(BS_blk(i_block)%mode)=="C" .and. iHxc==3 .and. .not.BS_cpl_K_corr) then + BS_blk(i_block)%done(:,:)="t" + if(BS_K_cutoff>0._SP) BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) ) + goto 100 + endif + ! i_Tgrp_k =BS_blk(i_block)%iT_k i_Tgrp_p =BS_blk(i_block)%iT_p i_Tgrp_k_st=BS_blk(i_block)%iT_k_st @@ -421,33 +481,40 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) i_k =BS_blk(i_block)%ik i_p =BS_blk(i_block)%ip ! - mode_now=BS_blk(i_block)%mode - if (.not.l_BS_ares_from_res) mode_now="F" - ! ! Exchange oscillators !---------------------- - ! - call K_exchange_collisions(iq,Xk,i_Tgrp_k) - ! - if (i_Tgrp_k/=i_Tgrp_p) call K_exchange_collisions(iq,Xk,i_Tgrp_p) + if(iHxc==1) then + if (.not.allocated( BS_T_grp(i_Tgrp_k)%O_x) ) then + call BS_exchange_oscillators_alloc(i_Tgrp_k) + call K_exchange_collisions(iq,Xk,i_Tgrp_k,NG,l_bs_exch_wf_in_loop) + endif + if (.not.allocated( BS_T_grp(i_Tgrp_p)%O_x) ) then + call BS_exchange_oscillators_alloc(i_Tgrp_p) + call K_exchange_collisions(iq,Xk,i_Tgrp_p,NG,l_bs_exch_wf_in_loop) + endif + endif ! ! TDDFT oscillators !---------------------- - if(BS_K_is_ALDA.and.l_tddft_gsum) then + if(iHxc==2 .and. l_tddft_gsum) then if (.not.allocated( BS_T_grp(i_Tgrp_k)%O_tddft_L) ) then call TDDFT_oscillators_alloc_L(i_Tgrp_k) - call TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_Tgrp_k,tddft_wf,mode_now) + call TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_Tgrp_k,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode_now) endif if (.not.allocated( BS_T_grp(i_Tgrp_p)%O_tddft_R) ) then call TDDFT_oscillators_alloc_R(i_Tgrp_p) - call TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_Tgrp_p,tddft_wf,mode_now) + call TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_Tgrp_p,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode_now) endif endif ! ! Correlation oscillators !---------------------- - if (l_std_alg) call K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q) - if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q) + if(iHxc==3) then + if (l_std_alg) call K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q) + if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q) + endif + ! + call timing('X T_space Kernel sum',OPR='start') ! ! Transition Loops !------------------ @@ -458,11 +525,11 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) !$omp & sqrt_f_itk,i_k_bz,i_v_k,i_c_k,i_k_sp_pol_c,i_k_sp_pol_v,i_k_s,is_k,os_k,i_k_s_m1,H_pos, & !$omp & sqrt_f_itp,i_p_bz,i_v_p,i_c_p,i_p_sp_pol_c,i_p_sp_pol_v,i_p_s,is_p,os_p,i_kp_s, & !$omp & i_kmq_bz,ig_kmq,i_kmq,i_kmq_s,i_kmq_t,i_kmq_s_m1,i_kp_mq_s, & - !$omp & i_pmq_bz,ig_pmq,i_pmq,i_pmq_s,i_pmq_t, & + !$omp & i_pmq_bz,ig_pmq,i_pmq,i_pmq_s,i_pmq_t,ig_W_final, & !$omp & ig_W,iq_W,iq_W_s,iq_W_bz,iq_W_bz_mq,ig_W_mq,iq_W_mq,iq_W_s_mq,H_x,H_c, & !$omp & i_p_bz_last,i_k_bz_last,i_pmq_bz_last,i_kmq_bz_last, & !$omp & iq_W_bz_last,ig_W_last,iq_W_bz_mq_last,ig_W_mq_last, & - !$omp & i_k_bz_mem,i_kmq_bz_mem,qindx_tmp,tddft_wf ) + !$omp & i_k_bz_mem,i_kmq_bz_mem,qindx_tmp,BS_mat_tmp,tddft_wf ) #endif ! call OPENMP_update(master_thread) @@ -483,8 +550,9 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& & (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& - & (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) - if (H_pos(1)>H_pos(2)) cycle + & (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) + ! + if (H_pos(1)>H_pos(2)) cycle ! i_k_bz = BS_T_grp(i_Tgrp_k)%table(i_Tk,1) i_p_bz = BS_T_grp(i_Tgrp_p)%table(i_Tp,1) @@ -498,16 +566,13 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) i_kmq_bz=qindx_X(iq,i_k_bz,1) i_pmq_bz=qindx_X(iq,i_p_bz,1) ! - ig_kmq = qindx_X(iq,i_k_bz,2) - ig_pmq = qindx_X(iq,i_p_bz,2) - ! i_kmq =Xk%sstar(i_kmq_bz,1) i_pmq =Xk%sstar(i_pmq_bz,1) ! i_kmq_s =Xk%sstar(i_kmq_bz,2) i_pmq_s =Xk%sstar(i_pmq_bz,2) ! - if (BS_res_K_corr) then + if (iHxc==3) then i_kmq_t=BS_blk(i_block)%kp_table(1,i_kmq) i_pmq_t=BS_blk(i_block)%kp_table(2,i_pmq) endif @@ -515,7 +580,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) i_kmq_s_m1 = sop_inv(i_kmq_s) i_kp_mq_s = sop_tab(i_kmq_s_m1,i_pmq_s) ! - if ((BS_res_K_corr.or.BS_cpl_K_corr)) then + if (iHxc==3) then i_k_bz_mem=PAR_K_scheme%bz_index(i_k_bz) if (i_p_bz_last/=i_p_bz.or.i_k_bz_last/=i_k_bz) then i_p_bz_last=i_p_bz @@ -551,7 +616,10 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) i_k_sp_pol_v=BS_T_grp(i_Tgrp_k)%table(i_Tk,5) i_p_sp_pol_v=BS_T_grp(i_Tgrp_p)%table(i_Tp,5) ! - if ((BS_res_K_corr.or.BS_cpl_K_corr)) then + ig_kmq = qindx_X(iq,i_k_bz,2) + ig_pmq = qindx_X(iq,i_p_bz,2) + ! + if (iHxc==3) then i_kmq_bz_mem=PAR_K_scheme%bz_index(i_kmq_bz) if (i_pmq_bz_last/=i_pmq_bz.or.i_kmq_bz_last/=i_kmq_bz) then i_pmq_bz_last=i_pmq_bz @@ -581,14 +649,14 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! endif ! - if (BS_K_is_ALDA.and.l_tddft_rsum) then + if (iHxc==2.and.l_tddft_rsum) then is_k = (/i_c_k,i_k,i_k_s,i_k_sp_pol_c/) os_k = (/i_v_k,i_kmq,i_kmq_s,i_k_sp_pol_v/) is_p = (/i_c_p,i_p,i_p_s,i_p_sp_pol_c/) os_p = (/i_v_p,i_pmq,i_pmq_s,i_p_sp_pol_v/) endif ! - if (BS_res_K_corr.or.BS_cpl_K_corr) then + if (iHxc==3) then ! if (iq_W_bz/=iq_W_bz_mq.or.iq_W/=iq_W_mq.or.iq_W_s/=iq_W_s_mq) call error("Wrong transferred momentum") ! @@ -609,18 +677,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) !=================================== ! ! Exchange - if (BS_res_K_exchange) H_x=K_exchange_kernel_resonant(iq, BS_n_g_exch, & + if (iHxc==1) H_x=K_exchange_kernel_resonant(iq, BS_n_g_exch, & & BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) ! ! ALDA - if (BS_K_is_ALDA) then + if (iHxc==2) then if(l_tddft_rsum) H_x=H_x+TDDFT_ALDA_eh_space_R_kernel(is_k,os_k,is_p,os_p,tddft_wf,'RES') if(l_tddft_gsum) H_x=H_x+TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc,& & BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) endif ! ! Correlations - if (BS_res_K_corr) then + if (iHxc==3) then if (l_std_alg) H_c=K_correlation_kernel_std(i_block,i_p,i_pmq, & & i_k_s,i_kp_s,i_c_k,i_c_p,i_kmq_s,i_kp_mq_s,i_v_k,i_v_p, & & i_kmq_t,i_pmq_t,i_k_sp_pol_c,i_p_sp_pol_c,i_k_sp_pol_v,i_p_sp_pol_v, & @@ -637,18 +705,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) !============================= ! ! Exchange - if (BS_cpl_K_exchange) H_x=K_exchange_kernel_coupling(iq, BS_n_g_exch, & - & BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) + if (iHxc==1) H_x=K_exchange_kernel_coupling(iq, BS_n_g_exch, & +& BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) ! ! ALDA - if (BS_K_is_ALDA) then + if (iHxc==2) then if(l_tddft_rsum) H_x=H_x+TDDFT_ALDA_eh_space_R_kernel(is_k,os_k,is_p,os_p,tddft_wf,'CPL') if(l_tddft_gsum) H_x=H_x+TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc,& & BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) endif ! ! Correlations - if (BS_cpl_K_corr) then + if (iHxc==3) then if (l_std_alg) H_c=K_correlation_kernel_std(i_block,i_p,i_pmq, & & i_k_s,i_kp_s,i_c_k,i_v_p,i_kmq_s,i_kp_mq_s,i_v_k,i_c_p, & & i_kmq_t,i_pmq_t,i_k_sp_pol_c,i_p_sp_pol_v,i_k_sp_pol_v,i_p_sp_pol_c, & @@ -661,15 +729,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) ! end select ! - if (l_tddft.and.WF_exx_fraction>0._SP) H_c=H_c*WF_exx_fraction + if (l_tddft.and.WF_exx_fraction>0._SP.and.iHxc==3) H_c=H_c*WF_exx_fraction ! sqrt_f_itk=sqrt(cmplx(BS_T_grp(i_Tgrp_k)%f(i_Tk),kind=SP)) sqrt_f_itp=sqrt(cmplx(BS_T_grp(i_Tgrp_p)%f(i_Tp),kind=SP)) ! - BS_blk(i_block)%mat(i_Tk,i_Tp)=sqrt_f_itk*(H_x*Co-H_c)*sqrt_f_itp - if (mode_now=="C") BS_blk(i_block)%mat(i_Tk,i_Tp)= cI*BS_blk(i_block)%mat(i_Tk,i_Tp) + BS_mat_tmp=sqrt_f_itk*(H_x*Co-H_c)*sqrt_f_itp + if (mode_now=="C") BS_mat_tmp= cI*BS_mat_tmp ! - BS_blk(i_block)%done(i_Tk,i_Tp)="t" + if ( l_matrix_init) BS_blk(i_block)%mat(i_Tk,i_Tp) = BS_mat_tmp + if (.not.l_matrix_init) BS_blk(i_block)%mat(i_Tk,i_Tp) = BS_blk(i_block)%mat(i_Tk,i_Tp)+BS_mat_tmp + ! + if (l_write_kernel) BS_blk(i_block)%done(i_Tk,i_Tp)="t" ! i_v_k=BS_T_grp(i_Tgrp_k)%table(i_Tk,2) i_c_k=BS_T_grp(i_Tgrp_k)%table(i_Tk,3) @@ -699,64 +770,45 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) !$omp end parallel #endif ! - if(BS_K_cutoff>0._SP) BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) ) + call timing('X T_space Kernel sum',OPR='stop') ! - call BS_oscillators_free(0,i_block) + if(BS_K_cutoff>0._SP.and.l_write_kernel) & +& BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) ) ! - if(l_BSE_minimize_memory) then - call BS_oscillators_free(i_Tgrp_p,0) - call BS_oscillators_free(i_Tgrp_k,0) - if(l_tddft_gsum) then - call TDDFT_oscillators_free_L(i_Tgrp_k) - call TDDFT_oscillators_free_R(i_Tgrp_p) - endif + if(l_BSE_minimize_memory.and.iHxc==1) then + call BS_exchange_oscillators_free(i_Tgrp_k) + call BS_exchange_oscillators_free(i_Tgrp_p) endif ! + if(l_BSE_minimize_memory.and.iHxc==2.and.l_tddft_gsum) then + call TDDFT_oscillators_free_L(i_Tgrp_k) + call TDDFT_oscillators_free_R(i_Tgrp_p) + endif + ! + if(iHxc==3) call BS_correlation_oscillators_free(i_block,l_std_alg) + ! +100 if(l_write_kernel) then #if defined _PAR_IO - call io_control(ACTION=WR,ID=ID) - call io_BS_PAR_block(iq,i_block,ID,"full") + call io_control(ACTION=WR,ID=ID) + call io_BS_PAR_block(iq,i_block,ID,"full") #else - call io_control(ACTION=WR,COM=REP,SEC=(/i_block+1/),ID=ID) - io_BS_err=io_BS(iq,X,ID) + call io_control(ACTION=WR,COM=REP,SEC=(/i_block+1/),ID=ID) + io_BS_err=io_BS(iq,X,ID) #endif + endif ! call live_timing(steps=i_steps) ! enddo block_loop ! -#if defined _PAR_IO - if(BS_K_cutoff>0._SP) then - call PP_redux_wait(BS_max_val_CPU) - BS_max_val=maxval(BS_max_val_CPU) - call K_compress(iq,ID_compr) - call K_compressed_IO(iq,io_BS_err,"write") - endif - ! - call io_control(ACTION=WR_CL,COM=REP,ID=ID_head) - call io_control(ACTION=WR_CL,COM=REP,ID=ID) - call io_BS_PAR_free(ID_head,ID,"full",.true.) -#else - call io_control(ACTION=WR_CL,COM=REP,SEC=(/n_BS_blks+2/),ID=ID) - io_BS_err=io_BS(iq,X,ID) -#endif - ! - if (n_steps>0) call live_timing( ) + if (m_steps>0) call live_timing( ) ! ! GLOBAL CLEANING !================= ! - call BS_oscillators_free(BS_nT_grps,0) + if(iHxc==1) call BS_exchange_oscillators_free(BS_nT_grps) ! - if ( (BS_res_K_corr.or.BS_cpl_K_corr) .and. l_std_alg) then - YAMBO_FREE(WF_phase) - endif - ! - if (l_bs_fxc) then - ! - YAMBO_FREE(F_xc_gspace) - YAMBO_FREE(FXC_K_diagonal) - ! - else if (BS_K_is_ALDA) then + if(iHxc==2) then ! if (l_tddft_rsum) then YAMBO_FREE(tddft_wf%rhotwr1) @@ -769,16 +821,50 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss) YAMBO_FREE(tddft_wf%WF_symm1) YAMBO_FREE(tddft_wf%WF_symm2) ! - YAMBO_FREE(F_xc_mat) + if (l_bs_fxc) then + YAMBO_FREE(F_xc_gspace) + YAMBO_FREE(FXC_K_diagonal) + else if (BS_K_is_ALDA) then + YAMBO_FREE(F_xc_mat) + endif ! endif ! - if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) qindx_tmp=qindx_B_close(qindx_ID,qindx_ID_frag) + if(iHxc==3) then + if (l_std_alg) then + YAMBO_FREE(WF_phase) + endif + qindx_tmp=qindx_B_close(qindx_ID,qindx_ID_frag) + YAMBO_FREE(BS_W) + YAMBO_FREE(RIM_W) + endif + ! + if ( (iHxc==1 .and. l_bs_exch_wf_in_loop) .or. (iHxc==2.and.l_bs_tddft_wf_in_loop.and.l_tddft_gsum)) then + call fft_free() + else + call WF_free(WF,keep_states_to_load=l_write_kernel) + endif + ! + l_matrix_init=.false. + ! + enddo ! iHxc loop + ! +#if defined _PAR_IO + if(BS_K_cutoff>0._SP) then + call PP_redux_wait(BS_max_val_CPU) + BS_max_val=maxval(BS_max_val_CPU) + call K_compress(iq,ID_compr) + call K_compressed_IO(iq,io_BS_err,"write") + endif ! - YAMBO_FREE(BS_W) - YAMBO_FREE(RIM_W) - call WF_free(WF) + call io_control(ACTION=WR_CL,COM=REP,ID=ID_head) + call io_control(ACTION=WR_CL,COM=REP,ID=ID) + call io_BS_PAR_free(ID_head,ID,"full",.true.) +#else + call io_control(ACTION=WR_CL,COM=REP,SEC=(/n_BS_blks+2/),ID=ID) + io_BS_err=io_BS(iq,X,ID) +#endif ! call timing('X T_space Kernel',OPR='stop') ! -end subroutine K +end subroutine K_kernel diff --git a/src/bse/K_observables.F b/src/bse/K_observables.F index e46ace829f..e5ad52a0ff 100644 --- a/src/bse/K_observables.F +++ b/src/bse/K_observables.F @@ -16,7 +16,7 @@ subroutine K_observables( W ) use BS_solvers, ONLY:run_Haydock,run_inversion,run_Diago,BSS_mode use BS, ONLY:BS_dip_size,l_BS_anomalous_Hall,l_BS_abs,l_BS_jdos,l_BS_trace,BSE_prop,& & l_BS_esort,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_anomalous_Hall,l_BS_photolum,& -& l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum +& l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum,l_BS_kerr_asymm,l_BS_esort_indx use parser_m, ONLY:parser use stderr, ONLY:STRING_match use electrons, ONLY:levels,n_sp_pol,n_spin @@ -37,10 +37,13 @@ subroutine K_observables( W ) l_BS_kerr = STRING_match(BSE_prop,'kerr') l_BS_photolum = STRING_match(BSE_prop,'photolum') l_BS_magnons = STRING_match(BSE_prop,'magn') .and.n_spin>1 + l_BS_kerr_asymm= STRING_match(BSE_prop,'asymm') .and. l_BS_kerr + l_BS_anomalous_Hall = STRING_match(BSE_prop,'anHall') .and. l_BS_kerr ! ! How ... !--------- l_BS_esort = STRING_match(BSE_prop,'esrt') + l_BS_esort_indx= STRING_match(BSE_prop,'indx').and.l_BS_esort ! ! How to treat dipoles... !------------------------ @@ -55,16 +58,44 @@ subroutine K_observables( W ) call warning(" n_sp_pol=2 and magnons requested. Spin flip BSE only is computed") endif if (l_BS_kerr) then - call parser('AnHall',l_BS_anomalous_Hall) + if(.not.l_BS_anomalous_Hall) call parser('AnHall',l_BS_anomalous_Hall) BS_dip_size=2 l_BS_abs=.TRUE. endif if (l_BS_photolum) global_gauge="velocity" + if ((l_BS_photolum.or.l_BS_kerr).and.l_BS_trace) then + call warning(" Trace not compatible with kerr or pl. Switching it off") + l_BS_trace = .false. + endif + if (l_BS_dichroism.and..not.l_BS_trace) then + BS_dip_size=2 + call warning(" Natural dichroism without trace") + ! NB: in the case without trace the quadripolar terms should be taken into account, + ! With trace these terms average to zero + ! call warning(" Trace is automatically switched on with dichroism") + ! l_BS_trace=.true. + endif + ! if (l_BS_trace ) BS_dip_size = 3 if (l_BS_trace .and. .not.l_rpa_IP) call error(" trace coded only at the IP level") ! l_BS_optics=l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism.or.l_BS_photolum ! + ! Now fix BSE_prop string + ! + BSE_prop="" + if(l_BS_abs) BSE_prop=trim(BSE_prop)//' abs' + if(l_BS_jdos) BSE_prop=trim(BSE_prop)//' jdos' + if(l_BS_dichroism) BSE_prop=trim(BSE_prop)//' dich' + if(l_BS_kerr) BSE_prop=trim(BSE_prop)//' kerr' + if(l_BS_kerr_asymm) BSE_prop=trim(BSE_prop)//' asymm' + if(l_BS_anomalous_Hall) BSE_prop=trim(BSE_prop)//' anHall' + if(l_BS_photolum) BSE_prop=trim(BSE_prop)//' photolum' + if(l_BS_magnons) BSE_prop=trim(BSE_prop)//' magn' + if(l_BS_esort) BSE_prop=trim(BSE_prop)//' esrt' + if(l_BS_esort) BSE_prop=trim(BSE_prop)//' indx' + if(l_BS_trace) BSE_prop=trim(BSE_prop)//' trace' + ! ! Solver Logicals !================= run_Diago = STRING_match(BSS_mode,'d') diff --git a/src/bse/K_solvers.F b/src/bse/K_solvers.F index 7e6ac48041..9408d1313f 100644 --- a/src/bse/K_solvers.F +++ b/src/bse/K_solvers.F @@ -15,7 +15,7 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss) use BS_solvers, ONLY:BSS_desc,BSS_free,BS_mat,run_Haydock,& & run_inversion,run_Diago,run_Slepc use BS, ONLY:BS_K_has_been_calculated_loaded,& -& BS_blks_free,l_BS_abs,l_BS_kerr,l_BS_magnons + & BS_blks_free,l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism use parallel_m, ONLY:master_cpu,CPU_structure use IO_int, ONLY:IO_and_Messaging_switch use linear_algebra, ONLY:INV,DIAGO @@ -95,12 +95,14 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss) ! The jdos (l_BS_jdos) cannot be computed easely via the Haydock solver. ! it could be obtained as the average of N calls to K_Haydock ! with random intial vectors. See https://doi.org/10.1002/nla.2170 + ! ! A possible alternative could be to use the algorithm with Cf ! from both the left and the right hand side to get from a single Haydock loop ! if (l_BS_abs) call K_Haydock(iq,W_bss,"optics",Xk) if (l_BS_kerr) call K_Haydock(iq,W_bss,"kerr",Xk) if (l_BS_magnons) call K_Haydock(iq,W_bss,"magnons",Xk) + if (l_BS_dichroism) call K_Haydock(iq,W_bss,"dichroism",Xk) ! endif ! diff --git a/src/bz_ops/bz_samp_indexes.F b/src/bz_ops/bz_samp_indexes.F index 30c24261f4..2e40b7695f 100644 --- a/src/bz_ops/bz_samp_indexes.F +++ b/src/bz_ops/bz_samp_indexes.F @@ -94,7 +94,7 @@ subroutine bz_samp_indexes(en,k,Xk,q) ! if(l_nl_optics) then X_scattering =.FALSE. - sigma_scattering=.FALSE. + ! sigma_scattering=.FALSE. ! I need them for the LSEX endif ! #endif diff --git a/src/collisions/.objects b/src/collisions/.objects index 2d8cc5b306..22ead9636e 100644 --- a/src/collisions/.objects +++ b/src/collisions/.objects @@ -9,6 +9,8 @@ RT_objs = COLLISIONS_compose_rt.o #endif #if defined _NL NL_objs = COLLISIONS_compose_nl.o COLLISIONS_compress.o +LSEX_objs = Build_LSEX_collisions.o LSEX_potential.o OSCLL_eval.o OSCLL_load.o \ + OSCLL_compose_collision.o OSCLL_compose_nl.o OSCLL_compose_vbands.o #endif #if defined _QED || defined _SC || defined _RT || defined _NL objs= PLASMA_parallel_setup.o COLLISIONS_basic_operations.o \ @@ -16,5 +18,5 @@ objs= PLASMA_parallel_setup.o COLLISIONS_basic_operations.o \ COLLISIONS_alloc_and_free.o COLLISIONS_eval.o COLLISIONS_load.o \ COLLISIONS_linearize_and_IO.o COLLISIONS_map_to_QP_table.o \ COLLISIONS_NEQ_GW_static.o COLLISIONS_HXC.o SCATTERING_GW_kinematics.o \ - $(SC_objs) $(QED_objs) $(RT_objs) $(NL_objs) + $(SC_objs) $(QED_objs) $(RT_objs) $(NL_objs) $(LSEX_objs) #endif diff --git a/src/collisions/Build_LSEX_collisions.F b/src/collisions/Build_LSEX_collisions.F new file mode 100644 index 0000000000..4b77cebc17 --- /dev/null +++ b/src/collisions/Build_LSEX_collisions.F @@ -0,0 +1,82 @@ +! +! Copyright (C) 2000-2022 the LUMEN team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine Build_LSEX_collisions(k,q,i_qp,COLL) + ! + ! This subroutine calculate the long-range part of the Screened exchange + ! + use electrons, ONLY:n_sp_pol + use pars, ONLY:SP,cI,cZERO + use collision_ext, ONLY:OSCLL,W,ng_oscll,COLL_bands + use QP_m, ONLY:QP_table,QP_nk,QP_n_states + use R_lattice, ONLY:bz_samp,qindx_S + ! +#include + ! + type(bz_samp), intent(in) :: k,q + integer, intent(in) :: i_qp + ! + ! Work Space + ! + integer :: i_mp ! self-energy external indexes + integer :: i_np,iqbz,i_kmq ! self-energy internal indexes + integer :: ig1,ig2 + integer :: i_n,i_m + integer :: i_k + ! + ! Collisions output + ! + complex(SP), intent(out) :: COLL(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk) + ! + ! Coll indexes + ! + i_n=QP_table(i_qp,1) + i_m=QP_table(i_qp,2) + i_k=QP_table(i_qp,3) + ! + ! Build Collisions from the Oscillators + ! + ! COLL(n,m,n',m',k,k-q) = \sum_{G,G'} \rho(n,n',k,q,G')*\rho^*(m,m',k,q,G)*W(q,G',G) + ! + do i_mp=COLL_bands(1),COLL_bands(2) + do i_np=COLL_bands(1),COLL_bands(2) + ! + COLL(i_np,i_mp,:)=cZERO + ! + do iqbz=1,q%nbz + i_kmq = k%sstar(qindx_S(i_k,iqbz,1),1) + ! + ! Build Collision on the fly + ! + do ig1=1,ng_oscll + do ig2=1,ng_oscll + COLL(i_np,i_mp,i_kmq)=COLL(i_np,i_mp,i_kmq)+OSCLL(i_n,i_np,i_k,iqbz,ig2) & +& *conjg(OSCLL(i_m,i_mp,i_k,iqbz,ig1))*W(iqbz,ig2,ig1) + enddo + enddo + ! + enddo + enddo + enddo + ! +end subroutine Build_LSEX_collisions diff --git a/src/collisions/COLLISIONS_load.F b/src/collisions/COLLISIONS_load.F index c681b7db0f..0d039f513e 100644 --- a/src/collisions/COLLISIONS_load.F +++ b/src/collisions/COLLISIONS_load.F @@ -124,8 +124,8 @@ subroutine COLLISIONS_load(CHECK_EXISTENCE_ONLY) ! if (l_use_COH_collisions.and.l_sc_sex.and..not.l_use_Hxc_collisions) l_error=.TRUE. ! - if (l_error) call error( ' HXC collisions not found / not compatible') - if (l_warning) call warning(' HXC collisions not found. Potential is computed on the fly.') + if (l_error) call error( ' HXC collisions not found/not compatible') + if (l_warning) call warning(' HXC collisions not found/not compatible. Potential is computed on the fly.') ! l_use_scatt_collisions=all(io_err(3:4)==0).and.(l_elphoton_scatt.or.l_elphoton_corr.or.l_elel_scatt) if (io_err(3)/=0) call error(' el-el scatt collisions not found / not compatible') diff --git a/src/collisions/DOUBLE_project.dep b/src/collisions/DOUBLE_project.dep index bcfe075e27..342b1d24b0 100644 --- a/src/collisions/DOUBLE_project.dep +++ b/src/collisions/DOUBLE_project.dep @@ -1,3 +1,4 @@ + Build_LSEX_collisions.o COLLISIONS_HXC.o COLLISIONS_NEQ_GW_static.o COLLISIONS_alloc_and_free.o @@ -5,11 +6,18 @@ COLLISIONS_compose_nl.o COLLISIONS_compose_rt.o COLLISIONS_compose_sc.o + COLLISIONS_compress.o COLLISIONS_eval.o COLLISIONS_linearize_and_IO.o COLLISIONS_load.o COLLISIONS_map_to_QP_table.o COLLISIONS_momentum.o + LSEX_potential.o + OSCLL_compose_collision.o + OSCLL_compose_nl.o + OSCLL_compose_vbands.o + OSCLL_eval.o + OSCLL_load.o PLASMA_build_up.o PLASMA_parallel_setup.o PLASMA_tables_and_dimensions.o diff --git a/src/collisions/LSEX_potential.F b/src/collisions/LSEX_potential.F new file mode 100644 index 0000000000..2b8e5c8cc3 --- /dev/null +++ b/src/collisions/LSEX_potential.F @@ -0,0 +1,138 @@ +! +! Copyright (C) 2000-2022 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine LSEX_potential(q,X,Xw) + ! + ! Construct the screeend potential W(G=G'=0,q) + ! + use nl_optics, ONLY:NL_bands,Correlation,LSEX + use collision_ext, ONLY:W + use LIVE_t, ONLY:live_timing + use pars, ONLY:SP,cZERO,pi + use frequency, ONLY:w_samp,W_reset + use R_lattice, ONLY:bz_samp,qindx_S + use D_lattice, ONLY:DL_vol + use electrons, ONLY:levels,spin_occ + use collision_el, ONLY:elemental_collision,elemental_collision_free, & +& elemental_collision_alloc + use X_m, ONLY:X_mat,X_ALLOC_elemental,X_t + use IO_int, ONLY:io_control + use IO_m, ONLY:OP_RD_CL,REP,VERIFY,NONE,RD_CL,OP_RD,RD_CL_IF_END,manage_action, & +& deliver_IO_error_message + use matrix_operate, ONLY:hermitian + use collision_ext, ONLY:ng_oscll + use stderr, ONLY:intc + use parallel_int, ONLY:PP_wait + ! +#include + ! + type(bz_samp), intent(in) :: q + type(w_samp) :: Xw + type(X_t) :: X + ! + ! Work Space + ! + integer :: iqbz,iqibz,iqs,iqref + integer :: io_err,ID,ig1,ig2,IO_ACT + integer, external :: io_X + type(elemental_collision) :: isc + ! + call PP_wait() + ! + ! The number of G in X(q,G,G') is read from input + ! + X%ng =ng_oscll + isc%ngrho =X%ng + isc%iqref =0 + ! + call elemental_collision_free(isc) + call elemental_collision_alloc(isc,NG=X%ng,NG_GAMP=(/X%ng,X%ng/),TITLE="Wlr") + ! + call X_ALLOC_elemental('X',(/X%ng,X%ng,1/)) + ! + call live_timing('[NL] W potential :',q%nbz) + ! + if(Correlation==LSEX) then + call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),ID=ID) + io_err=io_X(X,Xw,ID) + if (io_err<0) call error('Incomplete and/or broken tatic diel. fun. database') + endif + ! + YAMBO_ALLOC(W,(q%nbz,ng_oscll,ng_oscll)) + iqref=0 + ! +!$OMP WORKSHARE + W=cZERO +!$OMP END WORKSHARE + ! + do iqbz=1,q%nbz + ! + isc%qs(2:)=(/q%sstar(iqbz,1),q%sstar(iqbz,2)/) + iqibz=isc%qs(2) + iqs =isc%qs(3) + ! + call scatter_Gamp(isc,'x') + ! + ! The bare exchange v + ! +!$OMP WORKSHARE + forall(ig1=1:X%ng) + W(iqbz,ig1,ig1)=-4._SP/spin_occ*pi*isc%gamp(ig1,ig1) + end forall +!$OMP END WORKSHARE + ! + if(Correlation==LSEX) then + ! + call scatter_Gamp(isc,'c') + ! + if(iqibz/=iqref) then + ! + IO_ACT=manage_action(RD_CL_IF_END,iqibz,1,q%nibz) + call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/2*iqibz,2*iqibz+1/),ID=ID) + io_err=io_X(X,Xw,ID) + ! + call deliver_IO_error_message(io_err,'PP/Em1s',STOP_it=.TRUE.) + iqref=iqibz + ! + endif + ! + ! Screened part of the exchange \tilde W = W - v + ! +!$OMP WORKSHARE + forall(ig1=1:X%ng,ig2=1:X%ng) + W(iqbz,ig1,ig2)=W(iqbz,ig1,ig2)-4._SP/spin_occ*pi*X_mat(ig1,ig2,1)*isc%gamp(ig1,ig2) + end forall +!$OMP END WORKSHARE + ! + endif + ! + call live_timing(steps=1) + ! + enddo + ! + call live_timing() + ! + call elemental_collision_free(isc) + call X_ALLOC_elemental('X') + ! +end subroutine LSEX_potential diff --git a/src/collisions/NL_project.dep b/src/collisions/NL_project.dep index f93c8bbabd..6174cfc54c 100644 --- a/src/collisions/NL_project.dep +++ b/src/collisions/NL_project.dep @@ -1,12 +1,20 @@ + Build_LSEX_collisions.o COLLISIONS_HXC.o COLLISIONS_NEQ_GW_static.o COLLISIONS_alloc_and_free.o COLLISIONS_basic_operations.o COLLISIONS_compose_nl.o + COLLISIONS_compress.o COLLISIONS_eval.o COLLISIONS_linearize_and_IO.o COLLISIONS_load.o COLLISIONS_map_to_QP_table.o + LSEX_potential.o + OSCLL_compose_collision.o + OSCLL_compose_nl.o + OSCLL_compose_vbands.o + OSCLL_eval.o + OSCLL_load.o PLASMA_build_up.o PLASMA_parallel_setup.o PLASMA_tables_and_dimensions.o diff --git a/src/collisions/OSCLL_compose_collision.F b/src/collisions/OSCLL_compose_collision.F new file mode 100644 index 0000000000..70ef858e7e --- /dev/null +++ b/src/collisions/OSCLL_compose_collision.F @@ -0,0 +1,103 @@ +! +! Copyright (C) 2020-2023 the Yambo team +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL) + ! + ! This subroutine use long-range part of the Screened exchange + ! and the oscillators to reconstruct the collisions + ! (at present used only for testing purpose) + ! + use pars, ONLY:SP,cZERO + use nl_optics, ONLY:NL_bands,Correlation,LSEX,LHF + use collision_ext, ONLY:OSCLL,W,ng_oscll + use QP_m, ONLY:QP_nk + use R_lattice, ONLY:bz_samp,qindx_S + use parallel_m, ONLY:PAR_Xk_ibz_index + use wrapper, ONLY:V_dot_V,M_by_V +#if defined _TIMING + use timing_m, ONLY:timing +#endif + ! + implicit none + ! + type(bz_samp), intent(in) :: k,q + ! + ! Collision indexes + integer, intent(in) :: i_n,i_m,i_k + ! + ! Work Space + ! + integer :: ibp ! self-energy external indexes + integer :: ib,iqbz,i_kmq ! self-energy internal indexes + integer :: ig1,ig2,ik_mem + complex(SP) :: V_tmp(ng_oscll) + ! + ! Collisions output + ! + complex(SP), intent(out) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk) + ! +#if defined _TIMING + call timing('OSCLL compose',OPR='start') +#endif + ! + ik_mem=PAR_Xk_ibz_index(i_k) + ! + ! Build Collisions from the Oscillators + ! + COLL=cZERO + ! + do iqbz=1,q%nbz + i_kmq = k%sstar(qindx_S(i_k,iqbz,1),1) + ! + ! Build Collision on the fly + ! + if(Correlation==LSEX) then + ! + do ig1=1,ng_oscll + do ig2=1,ng_oscll +!$OMP WORKSHARE + forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) + COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig2)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) & +& *W(iqbz,ig2,ig1) + end forall +!$OMP END WORKSHARE + enddo + enddo + ! + elseif(Correlation==LHF) then + do ig1=1,ng_oscll +!$OMP WORKSHARE + forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) + COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig1)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) & +& *W(iqbz,ig1,ig1) + end forall +!$OMP END WORKSHARE + enddo + endif + ! + enddo ! loop in iqbz + ! +#if defined _TIMING + call timing('OSCLL compose',OPR='stop') +#endif + ! +end subroutine OSCLL_compose_collision diff --git a/src/collisions/OSCLL_compose_nl.F b/src/collisions/OSCLL_compose_nl.F new file mode 100644 index 0000000000..dfcd65b474 --- /dev/null +++ b/src/collisions/OSCLL_compose_nl.F @@ -0,0 +1,89 @@ +! +! Copyright (C) 2020-2023 the Yambo team +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine OSCLL_compose_nl(dG,H_nl_sc,k,q,E,i_k,i_sp_pol) + ! + ! This subroutine calculate the long-range part of the Screened exchange + ! + use electrons, ONLY:n_sp_pol,levels,spin_occ + use pars, ONLY:SP,cI,cZERO,cONE + use FFT_m, ONLY:fft_size + use nl_optics, ONLY:NL_bands,NL_nbands + use collision_ext, ONLY:ng_oscll,OSCLL,W + use QP_m, ONLY:QP_nk + use R_lattice, ONLY:bz_samp,qindx_S,nXkibz + use wrapper_omp, ONLY:V_dot_V + use parallel_m, ONLY:PAR_Xk_ibz_index +#if defined _TIMING + use timing_m, ONLY:timing +#endif + ! + implicit none + ! + complex(SP), intent(inout) :: H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2)) + integer, intent(in) :: i_k,i_sp_pol + type(bz_samp), intent(in) :: k,q + type(levels), intent(in) :: E + complex(SP), intent(in) :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk) + ! + ! Work Space + ! + complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk) +#if defined _DOUBLE + complex(SP), external :: ZDOTU +#else + complex(SP), external :: CDOTU +#endif + integer :: i1,i2,i_n,i_m + ! +#if defined _TIMING + call timing('OSCLL compose NL',OPR='start') +#endif + ! + do i_n=NL_bands(1),NL_bands(2) + do i_m=i_n,NL_bands(2) + ! + call OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL) + ! +#if defined _DOUBLE + H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*ZDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1) +#else + H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*CDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1) +#endif + ! + enddo + enddo + ! + ! Symmetrization + ! + do i_n=NL_bands(1),NL_bands(2) + do i_m=i_n+1,NL_bands(2) + H_nl_sc(i_m,i_n)=conjg(H_nl_sc(i_n,i_m)) + enddo + H_nl_sc(i_n,i_n)=real(H_nl_sc(i_n,i_n)) + enddo + ! +#if defined _TIMING + call timing('OSCLL compose NL',OPR='stop') +#endif + ! +end subroutine OSCLL_compose_nl diff --git a/src/collisions/OSCLL_compose_vbands.F b/src/collisions/OSCLL_compose_vbands.F new file mode 100644 index 0000000000..d83534cdf4 --- /dev/null +++ b/src/collisions/OSCLL_compose_vbands.F @@ -0,0 +1,123 @@ +! +! Copyright (C) 2020-2023 the Yambo team +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine OSCLL_compose_vbands(V_input,H_nl_sc,k,q,E,i_k,i_sp_pol) + ! + ! This subroutine calculate the long-range part of the Screened exchange + ! + use electrons, ONLY:n_sp_pol,levels,spin_occ + use pars, ONLY:SP,cI,cZERO,cONE + use FFT_m, ONLY:fft_size + use nl_optics, ONLY:NL_bands,NL_nbands,Sigma_SEX_EQ + use collision_ext, ONLY:ng_oscll,OSCLL,W + use QP_m, ONLY:QP_nk + use R_lattice, ONLY:bz_samp,qindx_S,nXkibz + use wrapper_omp, ONLY:Vstar_dot_V_omp + use parallel_m, ONLY:PAR_Xk_ibz_index +#if defined _TIMING + use timing_m, ONLY:timing +#endif + ! + implicit none + ! + complex(SP), intent(inout) :: H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2)) + integer, intent(in) :: i_k,i_sp_pol + type(bz_samp), intent(in) :: k,q + type(levels), intent(in) :: E + complex(SP), intent(in) :: V_input(NL_bands(2),maxval(E%nbf),QP_nk,n_sp_pol) + ! + ! Work Space + ! + complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk) + complex(SP) :: V_kmq(NL_bands(1):NL_bands(2),maxval(E%nbf),q%nbz) + complex(SP) :: OSCLL_val(maxval(E%nbf),NL_bands(1):NL_bands(2),ng_oscll,q%nbz) + complex(SP) :: OSCLL_sum(q%nbz,ng_oscll,ng_oscll) + ! +#if defined _DOUBLE + complex(SP), external :: ZDOTU +#else + complex(SP), external :: CDOTU +#endif + complex(SP) :: COLL_val + integer :: i_kmq,i_n,i_m,iv,iqbz,ig1,ig2,ik_mem + ! + ik_mem=PAR_Xk_ibz_index(i_k) + ! +#if defined _TIMING + call timing('OSCLL compose NL',OPR='start') +#endif + ! +!$omp parallel do default(shared), private(iqbz,i_kmq) + do iqbz=1,q%nbz + i_kmq = k%sstar(qindx_S(i_k,iqbz,1),1) + V_kmq(:,:,iqbz) = V_input(NL_bands(1):,:,i_kmq,i_sp_pol) + enddo +!$omp end parallel do + ! + ! Build Oscillators for the time-dependent valence bands + ! +!$OMP WORKSHARE +forall(iv=1:E%nbf(i_sp_pol),iqbz=1:q%nbz,i_n=NL_bands(1):NL_bands(2),ig1=1:ng_oscll) + OSCLL_val(iv,i_n,ig1,iqbz)=sum(OSCLL(i_n,:,ik_mem,iqbz,ig1)*V_kmq(NL_bands(1):,iv,iqbz)) + end forall +!$OMP END WORKSHARE + ! + ! Remove equilibrium Sigma-sex + ! +!$OMP WORKSHARE + H_nl_sc=H_nl_sc-Sigma_SEX_EQ(:,:,i_sp_pol,ik_mem) +!$OMP END WORKSHARE + ! + do i_n=NL_bands(1),NL_bands(2) + do i_m=i_n,NL_bands(2) + ! +!$OMP WORKSHARE + forall(iqbz=1:q%nbz,ig1=1:ng_oscll,ig2=1:ng_oscll) + OSCLL_sum(iqbz,ig1,ig2)=dot_product(OSCLL_val(:,i_m,ig2,iqbz),OSCLL_val(:,i_n,ig2,iqbz)) + end forall +!$OMP END WORKSHARE + ! +#if defined _DOUBLE + H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)+spin_occ*ZDOTU(q%nbz*ng_oscll**2,OSCLL_sum,1,W,1) +#else + H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)+spin_occ*CDOTU(q%nbz*ng_oscll**2,OSCLL_sum,1,W,1) +#endif + ! + enddo + enddo + ! + ! Symmetrization + ! +!$omp parallel do default(shared), private(i_n,i_m) + do i_n=NL_bands(1),NL_bands(2) + do i_m=i_n+1,NL_bands(2) + H_nl_sc(i_m,i_n)=conjg(H_nl_sc(i_n,i_m)) + enddo + H_nl_sc(i_n,i_n)=real(H_nl_sc(i_n,i_n)) + enddo +!$omp end parallel do + ! +#if defined _TIMING + call timing('OSCLL compose NL',OPR='stop') +#endif + ! +end subroutine OSCLL_compose_vbands diff --git a/src/collisions/OSCLL_eval.F b/src/collisions/OSCLL_eval.F new file mode 100644 index 0000000000..3de61a57da --- /dev/null +++ b/src/collisions/OSCLL_eval.F @@ -0,0 +1,215 @@ +! +! Copyright (C) 2000-2022 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine OSCLL_eval(q,k,en) + ! + ! Evaluate oscillators for the Local-SEX + ! + use LIVE_t, ONLY:live_timing + use pars, ONLY:SP,cZERO + use R_lattice, ONLY:bz_samp,qindx_S + use electrons, ONLY:levels,n_sp_pol + use wave_func, ONLY:WF,wf_ng + use interfaces, ONLY:WF_load,WF_free + use QP_m, ONLY:QP_nk + use stderr, ONLY:intc + use collision_el, ONLY:elemental_collision,elemental_collision_free, & +& elemental_collision_alloc + use collision_ext, ONLY:COLL_bands,W,OSCLL_k,ng_oscll + use parallel_m, ONLY:PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID + use FFT_m, ONLY:fft_size + use IO_int, ONLY:io_control,IO_and_Messaging_switch + use IO_m, ONLY:OP_RD_CL,VERIFY,REP,OP_WR_CL,OP_APP_CL,file_is_present,rm_file,DUMP, & +& OP_IF_START_APP_CL_IF_END,manage_action + use com, ONLY:msg + use parallel_int, ONLY:PP_redux_wait,PP_wait + use parallel_m, ONLY:PAR_IND_Xk_ibz,PAR_COM_Xk_ibz_INDEX,master_cpu,myid,PAR_IND_G_b,master_cpu, & +& PAR_Xk_nibz,PAR_Xk_ibz_index,PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_INDEX,myid + ! +#include +#include + ! + type(bz_samp), intent(in) :: q,k + type(levels), intent(in) :: en + ! + ! Work Space + ! + type(elemental_collision) :: isc + integer :: i_k,i_m,i_n,i_q,iqbz,i_kmq_G0,iqs,i_kmq,i_kmq_s,i_k_s,ik_mem + integer :: i_k_sp,i_kmq_sp,iqibz,ig,i_np,i_mp,io,ic,i_sp_pol,ib_idx + integer :: ID,io_err,iGo_max,i_fragment,max_count,i_count,IO_ACT + integer :: COLL_nbands,OSCLL_perc + logical, allocatable :: OSCLL_done(:) + integer, external :: io_OSCLL + logical :: IO_do_it + ! + call PP_wait() + ! + i_sp_pol=1 + ! + call IO_and_Messaging_switch("SAVE") + call IO_and_Messaging_switch("+io_in",CONDITION=.TRUE.) + ! + call msg('s','[OSC] Checking oscillators header ') + call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID) + io_err=io_OSCLL(q,ID) + ! + YAMBO_ALLOC(OSCLL_done,(QP_nk)) + OSCLL_done=.FALSE. + if(io_err==0) then + ! + ! Check if all oscillators has been evaluated + ! + if(master_cpu) then + ! + do i_k=1,QP_nk + if(file_is_present('OSCLL_fragment_'//intc(i_k))) OSCLL_done(i_k)=.TRUE. + enddo + ! + endif + ! + call PP_redux_wait(OSCLL_done,imode=3) + ! + if(all(OSCLL_done.eqv..TRUE.)) then + return + else + OSCLL_perc=nint(count(OSCLL_done)/real(QP_nk)*100) + call msg('s','Restarting Oscillators calculation : '//trim(intc(OSCLL_perc))//'% done') + endif + ! + else + call msg('s','[OSC] Missing or broken oscillators ') + endif + ! + ! Oscillators not found, recalculated them + ! + if(master_cpu) then + call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID) + io_err=io_OSCLL(q,ID) + endif + ! + max_count=120 + do while(io_err/=0) + call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID) + io_err=io_OSCLL(q,ID) + if(io_err/=0) call sleep(1) + i_count=i_count+1 + if((i_count==max_count).and.io_err/=0) & +& call error(" [OSCLL] Header not found while writing dipoles ") + enddo + ! + call PP_wait() + ! + iGo_max=maxval(qindx_S(:,:,2)) + call WF_load(WF,wf_ng,iGo_max,COLL_bands,(/1,k%nibz/),space='R',title='-OSC') + ! + call elemental_collision_free(isc) + ! + isc%iqref =0 + isc%ngrho =ng_oscll ! only G=1 is required for local-SEX + ! + call elemental_collision_alloc(isc,NG=isc%ngrho,TITLE="OSCLL") + ! + call live_timing('[OSC] Oscillators :',PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1)) + ! + YAMBO_ALLOC(OSCLL_k,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),q%nbz,ng_oscll)) + ! + COLL_nbands=COLL_bands(2)-COLL_bands(1)+1 + ! + do i_k=1,QP_nk + ! + if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle + ! + if(OSCLL_done(i_k)) then + call live_timing(steps=1) + cycle + endif + + ! + ik_mem=PAR_Xk_ibz_index(i_k) + ! + OSCLL_k=cZERO + ! + do i_n=COLL_bands(1),COLL_bands(2) + do i_m=COLL_bands(1),COLL_bands(2) + ! + ib_idx=i_m-COLL_bands(1)+1+(i_n-COLL_bands(1))*COLL_nbands + ! + if (.not.PAR_IND_G_b%element_1D(ib_idx)) cycle + ! + do iqbz=1,q%nbz + ! + i_kmq_G0 =qindx_S(i_k,iqbz,2) + iqibz =q%sstar(iqbz,1) + iqs =q%sstar(iqbz,2) + isc%qs =(/i_kmq_G0,iqibz,iqs/) + ! + i_k_s=1 + i_kmq = k%sstar(qindx_S(i_k,iqbz,1),1) + i_kmq_s = k%sstar(qindx_S(i_k,iqbz,1),2) + ! + ! Spin-Polarization not supported!! + ! + i_k_sp = 1 + i_kmq_sp= 1 + ! + ! + isc%is =(/i_n,i_k,i_k_s,i_k_sp/) + isc%os =(/i_m,i_kmq,i_kmq_s,i_kmq_sp/) + call DEV_SUB(scatter_Bamp)(isc) + ! +!$omp parallel do default(shared), private(ig) + do ig=1,ng_oscll + OSCLL_k(i_n,i_m,iqbz,ig)=isc%rhotw(ig) + enddo +!$omp end parallel do + ! + enddo + ! + enddo + enddo + ! + call PP_redux_wait(OSCLL_k(:,:,:,:),COMM=PAR_COM_G_b_INDEX%COMM) + ! + i_fragment=i_k+(i_sp_pol-1)*k%nibz + ! + IO_ACT=OP_APP_CL !manage_action(OP_IF_START_APP_CL_IF_END,i_fragment,1,k%nibz*n_sp_pol) + call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+i_fragment/),MODE=VERIFY,ID=ID) + io_err=io_OSCLL(q,ID) + ! + call live_timing(steps=1) + ! + enddo + ! + call IO_and_Messaging_switch("RESTORE") + ! + call live_timing() + ! + YAMBO_FREE(OSCLL_k) + ! + call elemental_collision_free(isc) + call WF_free(WF) + ! + call PP_wait() + ! +end subroutine OSCLL_eval diff --git a/src/collisions/OSCLL_load.F b/src/collisions/OSCLL_load.F new file mode 100644 index 0000000000..ae5a78d733 --- /dev/null +++ b/src/collisions/OSCLL_load.F @@ -0,0 +1,80 @@ +! +! Copyright (C) 2000-2022 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine OSCLL_load(q,k) + ! + ! Evaluate oscillators for the Local-SEX + ! + use LIVE_t, ONLY:live_timing + use R_lattice, ONLY:bz_samp + use QP_m, ONLY:QP_nk + use parallel_m, ONLY:PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID,myid,PAR_COM_Xk_ibz_INDEX,master_cpu + use parallel_int, ONLY:PP_wait + use IO_m, ONLY:OP_RD,RD_CL_IF_END,VERIFY,REP,manage_action,deliver_IO_error_message + use IO_int, ONLY:io_control,IO_and_Messaging_switch + use com, ONLY:msg + ! +#include + ! + type(bz_samp), intent(in) :: q,k + ! + ! Work Space + ! + integer :: i_k,i_sp_pol + integer :: ID,io_err,IO_ACT + integer, external :: io_OSCLL + ! + call PP_wait(COMM=PAR_COM_Xk_ibz_INDEX%COMM) + ! + i_sp_pol=1 + ! + call IO_and_Messaging_switch("SAVE") + call IO_and_Messaging_switch("+io_in",CONDITION=.TRUE.) + ! + call msg('s','[OSC] Checking oscillators header ') + call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID) + io_err=io_OSCLL(q,ID) + ! + if(io_err/=0) & +& call error('Oscillators not present, evaluate them before non-linear reponse: "yambo -e -v lsex" ') + ! + ! Load Oscillators + ! + call live_timing('[OSC] Load Oscillators :',PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1)) + ! + do i_k=1,QP_nk + if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle + IO_ACT=manage_action(RD_CL_IF_END,i_k,1,(i_sp_pol)*k%nibz) + call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+i_k+(i_sp_pol-1)*k%nibz/),ID=ID) + io_err=io_OSCLL(q,ID) + call deliver_IO_error_message(io_err,'Oscillators',STOP_it=.TRUE.) + call live_timing(steps=1) + enddo + call live_timing() + ! + call PP_wait(COMM=PAR_COM_Xk_ibz_INDEX%COMM) + call IO_and_Messaging_switch("RESTORE") + ! + call msg('rs','[OSC] Oscillators loaded') + ! +end subroutine OSCLL_load diff --git a/src/common/eval_G_minus_G.F b/src/common/eval_G_minus_G.F index 52383bedad..1e8ee8e9d3 100644 --- a/src/common/eval_G_minus_G.F +++ b/src/common/eval_G_minus_G.F @@ -18,7 +18,7 @@ integer function eval_G_minus_G(iG,iGo,COMM) use vec_operate, ONLY:iku_v_norm use parallel_m, ONLY:yMPI_comm,PP_indexes,PP_indexes_reset,myid use parallel_int, ONLY:PP_redux_wait,PARALLEL_index,PARALLEL_live_message - use R_lattice, ONLY:G_m_G,g_vec,ng_in_shell,n_g_shells,E_of_shell + use R_lattice, ONLY:G_m_G,g_vec,ng_in_shell,n_g_shells,E_of_shell,G_m_G_maxval #ifdef _CUDA use R_lattice, ONLY:G_m_G_d #endif @@ -34,6 +34,7 @@ integer function eval_G_minus_G(iG,iGo,COMM) integer :: iG_shell,iGo_shell,iG_shell_0,iG_shell_max,iG_,iGo_,iG_alloc,iGo_alloc real(SP):: E_iG, E_iGo, E_max real(SP):: v1(3) + integer, allocatable :: G_m_G_maxval_tmp(:) real(SP), allocatable :: E_G_m_G(:,:) type(PP_indexes) :: PAR_IND_G ! @@ -146,6 +147,9 @@ integer function eval_G_minus_G(iG,iGo,COMM) ! YAMBO_ALLOC(G_m_G,(iG_,iGo_)) G_m_G=0 + YAMBO_ALLOC(G_m_G_maxval_tmp,(iGo_)) + G_m_G_maxval_tmp=0 + G_m_G_maxval=0 ! !$omp parallel do default(shared), private(i2,i1,v1), schedule(dynamic) do i2=1,iGo_ @@ -159,11 +163,15 @@ integer function eval_G_minus_G(iG,iGo,COMM) ! v1(:)=g_vec(i1,:)-g_vec(i2,:) G_m_G(i1,i2)=G_index(v1,.false.) + G_m_G_maxval_tmp(i2)=max(G_m_G_maxval_tmp(i2),G_m_G(i1,i2)) ! enddo enddo !$omp end parallel do - ! + ! + G_m_G_maxval=maxval(G_m_G_maxval_tmp) + YAMBO_FREE(G_m_G_maxval_tmp) + ! if (present(COMM)) call PP_redux_wait(G_m_G,COMM=COMM%COMM) ! call PP_indexes_reset(PAR_IND_G) diff --git a/src/driver/PHEL_project.dep b/src/driver/PHEL_project.dep deleted file mode 100644 index e91fccffa5..0000000000 --- a/src/driver/PHEL_project.dep +++ /dev/null @@ -1,3 +0,0 @@ - options_projects.o - options_yambo.o - diff --git a/src/el-ph/RT_project.dep b/src/el-ph/RT_project.dep index bbc20f4200..2455333893 100644 --- a/src/el-ph/RT_project.dep +++ b/src/el-ph/RT_project.dep @@ -1,6 +1,2 @@ - ELPH_acoustic_phonon_properties.o - ELPH_databases_check.o - ELPH_databases_grids_map.o - ELPH_databases_load.o - ELPH_databases_symmetrize.o + ELPH_Sigma_c.o diff --git a/src/hamiltonian/V_Hartree.F b/src/hamiltonian/V_Hartree.F index ada08038dd..37e5c6c9f8 100644 --- a/src/hamiltonian/V_Hartree.F +++ b/src/hamiltonian/V_Hartree.F @@ -35,9 +35,11 @@ subroutine V_Hartree(rho,vhr) ! call timing('V_Hartree',OPR='start') ! +!$OMP WORKSHARE vhg = (0._DP,0._DP) vhtmp = (0._DP,0._DP) rhog = rho +!$OMP END WORKSHARE ! ! FFT call: rho in reciprocal space ! @@ -71,7 +73,9 @@ subroutine V_Hartree(rho,vhr) call fft_3d(vhtmp,fft_dim,1) #endif ! +!$OMP WORKSHARE vhr(:,1) = real(vhtmp,SP) + !$OMP END WORKSHARE if(n_spin==2) vhr(:,2)=vhr(:,1) ! call timing('V_Hartree',OPR='stop') diff --git a/src/hamiltonian/XC_potentials.F b/src/hamiltonian/XC_potentials.F index 928e5cd2fb..772f133d07 100644 --- a/src/hamiltonian/XC_potentials.F +++ b/src/hamiltonian/XC_potentials.F @@ -10,7 +10,8 @@ subroutine XC_potentials(POTENTIALS_string) use pars, ONLY:schlen use drivers, ONLY:l_sc_ip,l_sc_hartree,l_sc_exx,l_sc_fock,l_sc_coh,l_sc_sex,l_sc_is_libDFT,& & l_sc_srpa,l_use_Hxc_collisions,l_use_COH_collisions,l_eval_collisions - use global_XC, ONLY:H_SE_EXX,H_SE_FOCK,H_SE_COH,H_SE_SEX,H_SE_EXXC,H_SE_SRPA,H_SE_HARTREE + use global_XC, ONLY:H_SE_EXX,H_SE_FOCK,H_SE_COH,H_SE_SEX,H_SE_EXXC,H_SE_SRPA,H_SE_HARTREE, & +& H_SE_LSEX,H_SE_LHF use hamiltonian, ONLY:H_kind,H_xc_functional,l_sc_XC_is_local,l_sc_V_is_local use xc_functionals, ONLY:XC_LDA_X,XC_LDA_C_PZ,XC_EXCHANGE_CORRELATION,XC_EXCHANGE,& & GS_xc_KIND,GS_xc_FUNCTIONAL @@ -140,6 +141,14 @@ subroutine XC_potentials(POTENTIALS_string) l_use_Hxc_collisions=.TRUE. endif H_kind=H_kind+H_SE_SEX + else if (STRING_same(trim(potentials(i_c)),'LSEX')) then + l_sc_V_is_local=.false. + l_sc_XC_is_local=.false. + H_kind=H_kind+H_SE_LSEX + else if (STRING_same(trim(potentials(i_c)),'LHF')) then + l_sc_V_is_local=.false. + l_sc_XC_is_local=.false. + H_kind=H_kind+H_SE_LHF else if (STRING_same(trim(potentials(i_c)),'COHSEX')) then l_sc_coh=.true. l_sc_sex=.true. diff --git a/src/interface/INIT.F b/src/interface/INIT.F index 2b27665b4f..4a0270d393 100644 --- a/src/interface/INIT.F +++ b/src/interface/INIT.F @@ -482,10 +482,6 @@ subroutine LOCAL_setup_before_input_IO() end subroutine LOCAL_setup_before_input_IO ! subroutine LOCAL_after_input_IO - ! - ! q0 renormalization - ! - BSS_q0(:) = BSS_q0(:)*q0_def_norm/v_norm( BSS_q0) ! if (len_trim(BSE_mode)==0 ) BSE_mode="retarded" if (STRING_same(trim(BSE_mode),"causal") ) BSE_mode="retarded" diff --git a/src/interface/INIT_activate.F b/src/interface/INIT_activate.F index e3c52c1184..b4a7293536 100644 --- a/src/interface/INIT_activate.F +++ b/src/interface/INIT_activate.F @@ -104,7 +104,8 @@ subroutine INIT_activate() if (l_nl_optics) then call CPU_activate("NL") call CPU_activate("DIP") - call initactivate(1,'DIP_Threads NL_Threads') + call CPU_activate("OSCLL") + call initactivate(1,'DIP_Threads NL_Threads OSCLL_Threads') endif #endif ! @@ -122,7 +123,7 @@ subroutine INIT_activate() ! !Setup ! - if (l_setup) call initactivate(1,'MaxGvecs Gthresh K_grids IkSigLim IkXLim NoDiagSC') + if (l_setup) call initactivate(1,'MaxGvecs Gthresh K_grids IkSigLim IkXLim NoDiagSC EvalMagn') #if defined _ELPH if (l_setup) call initactivate(1,'BSEscatt') #endif @@ -205,7 +206,7 @@ subroutine INIT_activate() ! ! Dipoles & Gauges ! - call initactivate(1,'DipApproach DipComputed DipPDirect ShiftedPaths Gauge AnHall') + call initactivate(1,'DipApproach DipComputed DipPDirect ShiftedPaths Gauge') ! endif ! @@ -227,7 +228,8 @@ subroutine INIT_activate() ! Special case: The BSE equation is used to build up the BSE_Fxc kernel. if (l_bs_fxc) then call initactivate(1,'FxcGRLc FxcSVdig FxcRetarded FxcMEStps') - call initactivate(1,'BLongDir BEnRange BDmRange BEnSteps') + call initactivate(1,'BLongDir QPropDir BEnRange BDmRange BEnSteps') + !call initactivate(1,'EFieldDir QPropDir BEnRange BDmRange BEnSteps') endif ! endif @@ -243,7 +245,7 @@ subroutine INIT_activate() call INIT_RT_ctl_switch('R') #endif ! - call initactivate(1,'NoCondSumRule MetDamp AnHall') + call initactivate(1,'NoCondSumRule MetDamp') ! #if defined _NL || defined _SC call initactivate(1,'EvPolarization FrSndOrd') @@ -265,7 +267,8 @@ subroutine INIT_activate() ! if (l_optics.and.l_bse.and.l_bss.or.(l_bse.and.l_rpa_IP)) then ! - call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps BLongDir') + call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps BLongDir QPropDir') + !call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps EFieldDir QPropDir') call initactivate(1,'BSEprop BSEdips') ! ! Special case: the BSE_Fxc kernel has been constructed @@ -549,9 +552,10 @@ subroutine INIT_activate() #if defined _NL if (l_nl_optics) then call initactivate(1,'FFTGvecs NLBands NLverbosity NLstep NLtime NLintegrator NLCorrelation NLLrcAlpha') + call initactivate(1,'TestOSCLL') if(.not.l_nl_p_and_p) call initactivate(1,'NLEnRange NLEnSteps NLrotaxis NLAngSteps') call initactivate(1,'NLDamping RADLifeTime UseDipoles FrSndOrd NoComprCOLL EvalCurrent InducedField FrPolPerdic') - call initactivate(1,'Gauge RADLifeTime HARRLvcs EXXRLvcs') + call initactivate(1,'TestOSCLL Gauge RADLifeTime HARRLvcs EXXRLvcs CORRLvcs') call init_QP_ctl_switch('G') endif ! diff --git a/src/interface/INIT_load.F b/src/interface/INIT_load.F index 54a3779ebb..fd1f595448 100644 --- a/src/interface/INIT_load.F +++ b/src/interface/INIT_load.F @@ -35,7 +35,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) use BS, ONLY:BSE_mode,BSE_prop,BSK_mode,BS_eh_en,BS_eh_win,BS_q,BS_bands,& & BS_n_g_W,BS_n_g_exch,BS_n_g_fxc,BSE_L_kind,BS_K_cutoff,BSK_IO_mode,BSE_dipole_geometry use BS_solvers, ONLY:BSS_mode,Haydock_threshold,Haydock_iterIO,BSS_n_freqs,& -& BSS_dr,BSS_er,BSS_q0,BSS_damp_reference,BSS_inversion_mode,& +& BSS_dr,BSS_er,BSS_E_dir,BSS_Q_dir,BSS_damp_reference,BSS_inversion_mode,& & BSS_Wd,K_INV_EPS,K_INV_PL,BSS_n_eig,Haydock_iterMAX #if defined _SLEPC && !defined _NL use BS_solvers, ONLY:BSS_slepc_target_E,BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_maxit,& @@ -52,7 +52,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) #endif #if defined _SC || defined _RT || defined _QED use hamiltonian, ONLY:H_ref_bands,H_potential - use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands + use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands,ng_oscll #endif use QP_m, ONLY:SC_E_threshold #if defined _SC @@ -94,7 +94,8 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) & RES_tresh,FAN_deltaE_treshold,DW_deltaE_treshold,E_kpq_sh_fact,gkkp_db #endif #if defined _OPENMP - use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K,n_threads_NL + use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K,n_threads_NL, & +& n_threads_OSCLL #endif #if defined _SCALAPACK use SLK_m, ONLY:SLK_test_H_dim @@ -208,6 +209,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) call it(defs,'K_Threads', '[OPENMP/BSK] Number of threads for response functions',n_threads_K) call it(defs,'X_Threads', '[OPENMP/X] Number of threads for response functions',n_threads_X) call it(defs,'DIP_Threads', '[OPENMP/X] Number of threads for dipoles',n_threads_DIP) + call it(defs,'OSCLL_Threads', '[OPENMP/X] Number of threads for Oscillators',n_threads_OSCLL) call it(defs,'SE_Threads', '[OPENMP/GW] Number of threads for self-energy',n_threads_SE) call it(defs,'RT_Threads', '[OPENMP/RT] Number of threads for real-time',n_threads_RT) call it(defs,'NL_Threads', '[OPENMP/NL] Number of threads for nl-optics',n_threads_NL) @@ -296,12 +298,14 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) call it('f',defs,'NoCondSumRule' ,'[BSE/X] Do not impose the conductivity sum rule in velocity gauge',verb_level=V_resp) call it('f',defs,'MetDamp' , '[BSE] Define '//slash//'w+=sqrt('//slash//'w*('//slash//'w+i'//slash//'eta))',verb_level=V_resp) call it(defs,'BSSmod', '[BSS] (h)aydock/(d)iagonalization/(s)lepc/(i)nversion/(t)ddft`',BSS_mode,protect=.FALSE.) - call it(defs,'BSEprop', '[BSS] Can be any among abs/jdos/kerr/magn/dich/photolum/esrt',BSE_prop) + call it(defs,'BSEprop', '[BSS] Can be any among abs/jdos/kerr/asymm/anHAll/magn/dich/photolum/esrt',BSE_prop) call it(defs,'BSEdips', '[BSS] Can be "trace/none" or "xy/xz/yz" to define off-diagonal rotation plane',BSE_dipole_geometry) call it(defs,'BSSInvMode','[BSS] Inversion solver modality `(f)ull/(p)erturbative`',BSS_inversion_mode) call it(defs,'BSSInvPFratio','[BSS] Inversion solver. Ratio between the number of frequencies solved pert/full',& & K_INV_EPS%PERT_FULL_ratio) - call it(defs,'BLongDir', '[BSS] [cc] Electric Field',BSS_q0) + call it(defs,'BLongDir', '[BSS] [cc] Electric Field versor',BSS_E_dir) + !call it(defs,'EFieldDir', '[BSS] [cc] Electric Field versor',BSS_E_dir) + call it(defs,'QPropDir', '[BSS] [cc] Propagation versor Field',BSS_Q_dir,verb_level=V_resp) call it(defs,'BEnRange', '[BSS] Energy range',BSS_er,E_unit) call it(defs,'BDmRange', '[BSS] Damping range',BSS_dr,E_unit) call it(defs,'BSHayTrs', '[BSS] Relative [o/o] Haydock threshold. Strict(>0)/Average(<0)',Haydock_threshold) @@ -356,7 +360,6 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) ! ! BSE properties: kerr, magnons, absorption, ... ! - call it('f',defs,'AnHall' , '[BSE] Add the anomalous Hall effect to eps if using length gauge',verb_level=V_resp) call it(defs,'PL_weights','[PL] [cc] Weights of the carthesian components of the emitted radiation',PL_weights,verb_level=V_resp) ! #if defined _RT @@ -412,7 +415,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) ! call it(defs,'TwoAlpha', '[RT] C_nk ~ alpha*Gamma_nk^2 ',two_alpha,Verb_level=V_real_time) call it(defs,'GrKind', '[RT] G-ret kind: Lorentzian (QP)/ Hyperbolic QP_secant (HS)',Gr_kind,Verb_level=V_real_time) - call it(defs,'RADLifeTime','[RT] Radiative life-time (if negative Yambo sets it equal to Phase_LifeTime in NL)', & + call it(defs,'RADLifeTime','[RT] Radiative life-time (if negative RADLifeTime=Phase_LifeTime)', & & RAD_LifeTime,unit=Time_unit(1)) call it(defs,'RADmagnific','[RT] Radiative life-time magnification',RAD_magnification,Verb_level=V_real_time) call it(defs,'PhLifeTime', '[RT] Constant Dephasing Time',Phase_LifeTime,unit=Time_unit(1)) @@ -513,7 +516,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) call it(defs,'NLstep', '[NL] Time step length',RT_step,unit=Time_unit(1),Verb_level=V_nl_optics) call it(defs,'NLtime', '[NL] Simulation Time',NE_tot_time,unit=Time_unit(1)) call it(defs,'NLintegrator', '[NL] Integrator ("EULEREXP/RK2/RK4/RK2EXP/HEUN/INVINT/CRANKNIC")',Integrator_name) - call it(defs,'NLCorrelation','[NL] Correlation ("IPA/HARTREE/TDDFT/LRC/LRW/JGM/SEX") ',NL_correlation) + call it(defs,'NLCorrelation','[NL] Correlation ("IPA/HARTREE/TDDFT/LRC/LRW/JGM/SEX/LSEX/LHF") ',NL_correlation) call it(defs,'NLLrcAlpha', '[NL] Long Range Correction',NL_LRC_alpha) call it(defs,'NLDamping', '[NL] Damping (or dephasing)',NL_damping,unit=E_unit) call it(defs,'NLEnRange', '[NL] Energy range (for loop on frequencies NLEnSteps/=0',NL_er,E_unit) @@ -521,6 +524,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) call it(defs,'NLrotaxis', '[NL] Rotation axis (for the loop on angles NLAngSteps/=0)',NL_rot_axis) call it(defs,'NLEnSteps', '[NL] Energy steps for the loop on frequencies',n_frequencies) call it('f',defs,'UseDipoles','[NL] Use Covariant Dipoles (just for test purpose)',verb_level=V_nl_optics) + call it('f',defs,'TestOSCLL','[NL] Test Oscillators vs Full Collisions [for testing purposes]',verb_level=V_nl_optics) call it('f',defs,'FrSndOrd','[NL] Force second order in Covariant Dipoles',verb_level=V_nl_optics) call it('f',defs,'NoComprCOLL','[NL] Load collisions in double-precision and do not remove small elements (default false)',& & verb_level=V_nl_optics) @@ -782,6 +786,7 @@ subroutine CPU_structure_load() if (i_s==8) possible_fields='(w,k) ' ! NL if (i_s==9) possible_fields='(k) ' ! BZ if (i_s==10)possible_fields='(q,k,b) ' ! PH_SE + if (i_s==11)possible_fields='(k) ' ! OSCLL ! if (len_trim(CPU_structure(i_s)%Short_Description)==0) cycle name=trim(CPU_structure(i_s)%Short_Description)//"_CPU" diff --git a/src/io/.objects b/src/io/.objects index 6962cdbb0c..ca96eba112 100644 --- a/src/io/.objects +++ b/src/io/.objects @@ -15,6 +15,10 @@ ELPH_objects = variables_ELPH.o io_ELPH.o io_gFsq.o #if defined _PHEL PHEL_objects = io_PH.o #endif +COLL_objects = +#if defined _SC || _RT +COLL_objects = io_OSCLL.o +#endif objs = ver_is_gt_or_eq.o \ io_Vnl.o io_E_SOC_map.o \ io_RIM.o io_RIM_W.o io_GROT.o $(ELPH_objects) $(PHEL_objects) \ @@ -26,4 +30,4 @@ objs = ver_is_gt_or_eq.o \ io_header.o io_Double_Grid.o \ io_COL_CUT.o io_DB1_selective_scan.o variables_X.o variables_DIPOLES.o \ io_out_of_date.o \ - $(SC_objects) $(RT_objects) $(NL_objects) + $(SC_objects) $(RT_objects) $(NL_objects) $(COLL_objects) diff --git a/src/io/DOUBLE_project.dep b/src/io/DOUBLE_project.dep index 50ca185728..6600258148 100644 --- a/src/io/DOUBLE_project.dep +++ b/src/io/DOUBLE_project.dep @@ -17,6 +17,7 @@ io_MPA.o io_NL.o io_NLCC_pwscf.o + io_OSCLL.o io_PH.o io_QINDX.o io_QP_and_GF.o diff --git a/src/io/RT_project.dep b/src/io/RT_project.dep index a4625c321c..544ef7e355 100644 --- a/src/io/RT_project.dep +++ b/src/io/RT_project.dep @@ -1,5 +1,6 @@ io_DB1.o io_KB_real_space.o + io_OSCLL.o io_RT_components.o io_RT_components_G_lesser.o io_RT_components_OBS.o diff --git a/src/io/SC_project.dep b/src/io/SC_project.dep index 863d79b747..09f8e8df32 100644 --- a/src/io/SC_project.dep +++ b/src/io/SC_project.dep @@ -1,5 +1,6 @@ io_DB1.o io_KB_real_space.o + io_OSCLL.o io_SC_components.o load_SC_components.o variables_SC.o diff --git a/src/io/io_BSS_diago.F b/src/io/io_BSS_diago.F index 05e263801c..600244a95e 100644 --- a/src/io/io_BSS_diago.F +++ b/src/io/io_BSS_diago.F @@ -6,7 +6,7 @@ ! Authors (see AUTHORS file for details): AM ! integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,& -& bsL_magn,bsR_magn,bsR_kerr,bsR_pl) +& bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,bsR_pl) ! use pars, ONLY:SP,schlen,IP use stderr, ONLY:intc @@ -25,7 +25,7 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,& integer :: iq,ID,i_BS_mat complex(SP) , optional :: bsE(:) real(SP) ,pointer, optional :: BsE_corr(:,:),bsR_pl(:,:) - complex(SP),pointer, optional :: bsR_kerr(:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:) + complex(SP),pointer, optional :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:) ! ! Work Space ! @@ -127,6 +127,13 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,& endif endif ! + if(present(bsR_dich)) then + if(associated(bsR_dich)) then + call io_bulk(ID,'BS_DICH_Residuals',VAR_SZ=(/2,BSS_n_eig,2/)) + call io_bulk(ID,C2=bsR_dich) + endif + endif + ! if(present(bsR_pl)) then if(associated(bsR_pl)) then call io_bulk(ID,'BS_PL_Residuals',VAR_SZ=(/2,BSS_n_eig/)) diff --git a/src/io/io_OSCLL.F b/src/io/io_OSCLL.F new file mode 100644 index 0000000000..1907ba342f --- /dev/null +++ b/src/io/io_OSCLL.F @@ -0,0 +1,141 @@ +! +! Copyright (C) 2000-2022 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +integer function io_OSCLL(q,ID) + ! + use pars, ONLY:SP,schlen,IP_YIO + use electrons, ONLY:levels + use matrix_operate,ONLY:mat_c2r,mat_r2c + use R_lattice, ONLY:nXkibz,bz_samp + use QP_m, ONLY:QP_nk + use IO_int, ONLY:io_connect,io_disconnect,io_header,io_fragment, & +& def_variable_bulk,io_variable_bulk,io_variable_elemental,def_variable_elemental + use IO_m, ONLY:io_sec,frag_DIP,read_is_on,write_is_on,io_extension,& +& RD_CL_IF_END,RD_CL,io_action,IO_NOT_ALLOWED,io_DIP, & +& io_status,io_PAR_cpu + use collision_ext, ONLY:COLL_bands,OSCLL,OSCLL_k,ng_oscll + use parallel_m, ONLY:PAR_Xk_ibz_index,PAR_Xk_nibz +#if defined _TIMING + use timing_m, ONLY:timing +#endif + ! +#include + ! + integer ::ID + type(bz_samp), intent(in) :: q + ! + ! Work Space + ! + integer :: ik,ik_mem,i_fragment,COLL_nbands,i_sp_pol,iqbz,ig,ib,ID_frag + character(schlen) :: VAR_name + real(SP),allocatable :: disk_data(:,:,:) + ! +#if defined _TIMING + call timing('io_OSCLL',OPR='start') +#endif + ! + ID_frag=0 + io_OSCLL=0 + ! + io_extension(ID)='OSCLL' + io_OSCLL=io_connect(desc=trim(io_extension(ID)),type=2,ID=ID,FORCE_READ_MODE=(.not.any((/io_sec(ID,:)==1/)))) + ! + if (io_OSCLL/=0) goto 1 + ! + if (any((/io_sec(ID,:)==1/))) then + ! + io_OSCLL=io_header(ID,R_LATT=.true.,WF=.true.,IMPOSE_SN=.true.) + if (io_OSCLL/=0) goto 1 + ! + ! Variable for the Oscillators + ! + call def_variable_elemental(ID,"ng_oscll",1,IP_YIO,0) + call io_variable_elemental(ID,VAR="RL vectors in oscillators",I0=ng_oscll,CHECK=.true.,OP=(/"=="/)) + ! + call def_variable_elemental(ID,"COLL_bands",2,IP_YIO,0) + call io_variable_elemental(ID,VAR="Oscillators bands range",I1=COLL_bands,CHECK=.true.,OP=(/"==","=="/)) + ! + io_OSCLL=io_status(ID) + if (io_OSCLL/=0) goto 1 + ! + endif + ! + i_fragment=maxval(io_sec(ID,:))-1 + ! + if(i_fragment<=nXkibz) then + i_sp_pol=1 + ik=i_fragment + else + i_sp_pol=2 + ik=i_fragment-nXkibz + endif + ! + if (ik==0) goto 1 + ! + ik_mem=PAR_Xk_ibz_index(ik) + ! + COLL_nbands=COLL_bands(2)-COLL_bands(1)+1 + YAMBO_ALLOC(disk_data,(COLL_nbands,q%nbz,2)) + ! + if (read_is_on(ID)) then + if(.not.allocated(OSCLL)) then + YAMBO_ALLOC(OSCLL,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),PAR_Xk_nibz,q%nbz,ng_oscll)) + endif + endif + ! + ! Fragmentation + ! + call io_fragment(ID,ID_frag,i_fragment=i_fragment) + ! + ! Manage RD_CL_IF_END + ! + if (io_action(ID)==RD_CL_IF_END.and.ik==nXkibz) io_action(ID)=RD_CL + ! + do ib=COLL_bands(1),COLL_bands(2) + do ig=1,ng_oscll + ! + if (write_is_on(ID)) call mat_c2r(OSCLL_k(ib,:,:,ig),disk_data) + ! + write (VAR_name,'(3(a,i4.4))') 'OSCLL_',ik,'_ib_',ib,'_ig_',ig + call def_variable_bulk(ID_frag,trim(VAR_name),1,shape(disk_data),SP) + call io_variable_bulk(ID_frag,1,R3=disk_data) + ! + io_OSCLL=io_status(ID) + if (io_OSCLL/=0) goto 1 + ! + if (read_is_on(ID)) call mat_r2c(disk_data,OSCLL(ib,:,ik_mem,:,ig)) + ! + enddo + enddo + ! + YAMBO_FREE(disk_data) + ! +1 call io_fragment_disconnect(ID,ID_frag) + ! + call io_disconnect(ID=ID) + ! +#if defined _TIMING + call timing('io_OSCLL',OPR='stop') +#endif + ! +end function diff --git a/src/io/io_X.F b/src/io/io_X.F index d562fce064..c91c207b72 100644 --- a/src/io/io_X.F +++ b/src/io/io_X.F @@ -227,6 +227,7 @@ integer function io_X(X,Xw,ID) endif ! 1 call io_fragment_disconnect(ID,ID_frag) + ! ! call io_disconnect(ID) ! diff --git a/src/io/variables_NL.F b/src/io/variables_NL.F index ea1d562f7f..2721376091 100644 --- a/src/io/variables_NL.F +++ b/src/io/variables_NL.F @@ -11,6 +11,7 @@ integer function variables_NL(ID) use units, ONLY:AUT2FS, HA2EV, AU2KWCMm2 use nl_optics, ONLY:NL_damping,l_use_DIPOLES,NL_correlation,NL_er,NL_correlation,n_frequencies, & & NL_desc,NL_rot_axis,n_angles,NL_initial_versor + use collision_ext, ONLY:ng_oscll use QP_m, ONLY:QP_ng_SH,QP_ng_Sx use real_time, ONLY:Integrator_name,NE_steps,RT_step,l_RT_induced_field,RT_bands,RAD_LifeTime use electric, ONLY:l_force_SndOrd @@ -86,10 +87,14 @@ integer function variables_NL(ID) call def_variable_elemental(ID,"QP_ng_Sx",1,IP_YIO,0) call io_variable_elemental(ID,VAR="[NL] Number of G in EXCHANGE(DFT)",I0=QP_ng_Sx,CHECK=.true.,OP=(/"=="/), & & DESCRIPTOR=NL_desc) - + call def_variable_elemental(ID,"ng_OSCLL",1,IP_YIO,0) + call io_variable_elemental(ID,VAR="[NL] Number of G in LSEX",I0=ng_oscll,CHECK=.true.,OP=(/"=="/), & + & DESCRIPTOR=NL_desc) call def_variable_elemental(ID,"RAD_LifeTime",1,SP,0) call io_variable_elemental(ID,VAR="[NL] Radiative Life-Time",R0=RAD_LifeTime,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=NL_desc) ! + ! INTEGRATOR + ! call def_variable_elemental(ID,"Integrator",1,CR_YIO,0) call io_variable_elemental(ID,CH0=Integrator_name,VAR=' [RT] Integrator ',CHECK=.true.,OP=(/"=="/), & & DESCRIPTOR=NL_desc) diff --git a/src/io_parallel/io_BS_PAR_free.F b/src/io_parallel/io_BS_PAR_free.F index c1e55aaf22..bc89103e75 100644 --- a/src/io_parallel/io_BS_PAR_free.F +++ b/src/io_parallel/io_BS_PAR_free.F @@ -10,6 +10,7 @@ subroutine io_BS_PAR_free(ID_head,ID,mode,l_kernel_complete) ! close BSE netcdf parallel file ! use pars, ONLY:SP + use parallel_m, ONLY:master_cpu use parallel_int, ONLY:PP_wait use BS, ONLY:l_BSE_kernel_complete,BS_K_has_been_calculated_loaded,BS_K_cutoff use IO_m, ONLY:read_is_on,write_is_on,io_BS_K @@ -31,7 +32,7 @@ subroutine io_BS_PAR_free(ID_head,ID,mode,l_kernel_complete) BS_K_has_been_calculated_loaded=l_kernel_complete l_BSE_kernel_complete =l_kernel_complete.and.(io_BS_K.or.BS_K_cutoff>0._SP) endif - if(write_is_on(ID_head).or.read_is_on(ID_head)) then + if((write_is_on(ID_head).and.master_cpu).or.read_is_on(ID_head)) then io_BS=variables_BS_more(ID_head,'end') call io_disconnect(ID_head) endif diff --git a/src/io_parallel/io_BS_header.F b/src/io_parallel/io_BS_header.F index 1af17823f2..b1d97b6782 100644 --- a/src/io_parallel/io_BS_header.F +++ b/src/io_parallel/io_BS_header.F @@ -14,6 +14,7 @@ integer function io_BS_header(iq,X,ID,mode) use pars, ONLY:schlen,lchlen use stderr, ONLY:intc use X_m, ONLY:X_t + use parallel_m, ONLY:master_cpu use BS, ONLY:BS_K_cutoff_done,l_BSE_restart,l_BSE_kernel_complete use BS_solvers, ONLY:variables_BS,BSS_desc use IO_m, ONLY:frag_BS_K,write_is_on,read_is_on,io_mode,io_BS_K,DUMP @@ -57,6 +58,8 @@ integer function io_BS_header(iq,X,ID,mode) ! endif ! + if(.not. (read_is_on(ID) .or. (write_is_on(ID).and.master_cpu)) ) return + ! db_name='BS_head_Q'//trim(intc(iq)) ! io_BS_header=io_connect(desc=trim(db_name),type=2,ID=ID) @@ -64,10 +67,6 @@ integer function io_BS_header(iq,X,ID,mode) if (trim(mode)=="connect") return ! ! Check if the file already contains data and it is consistent - ! DS: warning. This is potentially dangerous since all MPI tasts are all writing - ! both the header and the variables - ! I cannot put here if(master_cpu) because I need all MPI_tasks to - ! go through the definition of the variables (see similar issue in io_X.F) ! io_BS_header=io_header(ID,QPTS=.true.,R_LATT=.true.,WF=.true.,& & IMPOSE_SN=.true.,XC_KIND="K_WF force Xs",CUTOFF=.true.,FRAG=frag_BS_K) diff --git a/src/linear_algebra/MATRIX_slepc.F b/src/linear_algebra/MATRIX_slepc.F index 7335cb4629..e61ba3f0fe 100644 --- a/src/linear_algebra/MATRIX_slepc.F +++ b/src/linear_algebra/MATRIX_slepc.F @@ -27,7 +27,7 @@ subroutine MATRIX_slepc(M_slepc,l_target_energy,n_eig,V_right,V_left,E_real,E_cm use stderr, ONLY : intc use BS_solvers, ONLY : BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_maxit,& & BSS_slepc_tol,BSS_slepc_target_E,BSS_slepc_precondition,& - & BSS_slepc_approach,BSS_slepc_mpd,BSS_slepc_matrix_format + & BSS_slepc_approach,BSS_slepc_matrix_format,BSS_slepc_mpd ! use petscsys use petscmat diff --git a/src/modules/SET_defaults.F b/src/modules/SET_defaults.F index 204ecc41fc..77191bc0da 100644 --- a/src/modules/SET_defaults.F +++ b/src/modules/SET_defaults.F @@ -47,14 +47,14 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR) use wave_func, ONLY:wf_ng,wf_norm_test,wf_nb_io,wf_nb_io_groups,WF,WF_buffer,WF_buffered_IO use FFT_m, ONLY:fft_dim_loaded,fft_size,fft_dim,fft_multiplier use IO_m, ONLY:io_reset,max_io_units,serial_number,frag_WF - use BS_solvers, ONLY:BSS_mode,BSS_n_freqs,BSS_er,BSS_dr,& -& BSS_q0,Haydock_threshold,Haydock_iterIO,Haydock_iterMAX,BSS_uses_DbGd,BSS_Wd,& + use BS_solvers, ONLY:BSS_mode,BSS_n_freqs,BSS_er,BSS_dr,BSS_P_dir,BSS_E_dir,BSS_Q_dir,& +& Haydock_threshold,Haydock_iterIO,Haydock_iterMAX,BSS_uses_DbGd,BSS_Wd,& & BSS_damp_reference,BSS_Vnl_included,BSS_uses_GreenF,BSS_inversion_mode,& & BSS_perturbative_width,K_INV_EPS,K_INV_PL,K_INV_PI_PH,BSS_desc use descriptors,ONLY:IO_desc_reset #if defined _SLEPC && !defined _NL use BS_solvers, ONLY:BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_target_E,BSS_slepc_maxit,& - & BSS_slepc_precondition,BSS_slepc_approach,BSS_slepc_mpd,BSS_slepc_matrix_format + & BSS_slepc_precondition,BSS_slepc_approach,BSS_slepc_matrix_format,BSS_slepc_mpd #endif use BS, ONLY:BS_n_g_W,BS_eh_en,BS_identifier,BS_q,BS_eh_win,MAX_BSK_LIN_size,& & BS_K_dim,BS_not_const_eh_f,BSK_mode,l_BSE_kernel_complete,& @@ -449,7 +449,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR) BSS_inversion_mode='pf' BSS_er=(/0._SP,10._SP/)/HA2EV BSS_dr=0.1_SP/HA2EV - BSS_q0=(/1._SP,0._SP,0._SP/) + BSS_E_dir=(/1._SP,0._SP,0._SP/) + BSS_P_dir=(/0._SP,1._SP,0._SP/) + BSS_Q_dir=(/0._SP,0._SP,1._SP/) BSS_uses_DbGd=.FALSE. BSS_damp_reference=0._SP BS_K_is_ALDA=.FALSE. diff --git a/src/modules/mod_BS.F b/src/modules/mod_BS.F index e6e82f929d..2812f48647 100644 --- a/src/modules/mod_BS.F +++ b/src/modules/mod_BS.F @@ -48,11 +48,13 @@ module BS logical :: l_BS_jdos logical :: l_BS_abs logical :: l_BS_kerr + logical :: l_BS_kerr_asymm logical :: l_BS_photolum logical :: l_BS_magnons logical :: l_BS_dichroism logical :: l_BS_optics logical :: l_BS_esort + logical :: l_BS_esort_indx ! ! Dimensions !============= @@ -278,69 +280,9 @@ integer function T_group(I) if (I>=BS_T_grp_1st_el(BS_nT_grps-1)) T_group=BS_nT_grps end function ! - subroutine BS_oscillators_alloc(EXCHANGE,iG,CORRELATION,iB) - ! - use pars, ONLY:IP - use stderr, ONLY:intc - ! - logical, intent(in) :: EXCHANGE,CORRELATION - integer, intent(in) :: iG,iB - ! - if (EXCHANGE) then - if (BS_T_grp(iG)%size==0) return - YAMBO_ALLOC(BS_T_grp(iG)%O_x,(BS_n_g_exch,BS_T_grp(iG)%size)) -#ifdef _CUDA - allocate(BS_T_grp(iG)%O_x_d,mold=BS_T_grp(iG)%O_x) -#endif - - endif - ! - if (CORRELATION) then - if (BS_blk(iB)%N_oscillators==0) return - YAMBO_ALLOC(BS_blk(iB)%O_c,(O_ng,BS_blk(iB)%N_oscillators)) -#ifdef _CUDA - allocate(BS_blk(iB)%O_c_d,mold=BS_blk(iB)%O_c) -#endif - endif - ! - end subroutine - ! - subroutine BS_oscillators_free(iG_ref,iB_ref) - use stderr, ONLY:intc - integer, intent(in) :: iG_ref,iB_ref - integer :: iG,iB,ik_loop,ip_loop,ik_now,ip_now - do iG=iG_ref,1,-1 - YAMBO_FREE(BS_T_grp(iG)%O_x) -#ifdef _CUDA - if (allocated(BS_T_grp(iG)%O_x_d)) deallocate(BS_T_grp(iG)%O_x_d) -#endif - enddo - if(iB_ref==0) return - ik_now=BS_blk(iB_ref)%ik - ip_now=BS_blk(iB_ref)%ip - if(iB_ref==n_BS_blks) then - ik_now=0 - ip_now=0 - endif - do iB=iB_ref,1,-1 - ik_loop=BS_blk(iB)%ik - ip_loop=BS_blk(iB)%ip - if ( ik_now==ik_loop .and. ip_now==ip_loop .and. .not.l_BSE_minimize_memory ) cycle - if (.not.allocated(BS_blk(iB)%O_c)) exit - YAMBO_FREE(BS_blk(iB)%O_c) - YAMBO_FREE(BS_blk(iB)%O_table) - YAMBO_FREE(BS_blk(iB)%kp_table) -#ifdef _CUDA - if (allocated(BS_blk(iB)%O_c_d)) deallocate(BS_blk(iB)%O_c_d) -#endif - enddo - ! - end subroutine - ! subroutine BS_Blocks_and_Transitions_alloc(E,iT,dom_k_T_group) ! use pars, ONLY:IP,cZERO - use stderr, ONLY:intc use electrons, ONLY:levels,n_sp_pol integer, intent(in) :: iT,dom_k_T_group type(levels), intent(in) :: E @@ -388,7 +330,6 @@ subroutine BS_blks_free(i_BS_mat) end subroutine ! subroutine BS_Blocks_and_Transitions_free() - use stderr, ONLY:intc use parallel_m, ONLY:PAR_BS_T_grps_index integer :: iT ! @@ -423,7 +364,7 @@ subroutine BS_Blocks_and_Transitions_free() ! if(allocated(BS_blk)) deallocate(BS_blk) ! - end subroutine + end subroutine BS_Blocks_and_Transitions_free ! function BS_Block_size(i_block) ! diff --git a/src/modules/mod_BS_solvers.F b/src/modules/mod_BS_solvers.F index 9e1a973f63..306f3ea043 100644 --- a/src/modules/mod_BS_solvers.F +++ b/src/modules/mod_BS_solvers.F @@ -59,6 +59,9 @@ module BS_solvers real(SP) :: BSS_dr(2) real(SP) :: BSS_damp_reference real(SP) :: BSS_q0(3) + real(SP) :: BSS_E_dir(3) ! Electric field + real(SP) :: BSS_P_dir(3) ! Induced polarization + real(SP) :: BSS_Q_dir(3) ! Field propagation direction logical :: BSS_uses_DbGd logical :: BSS_Vnl_included logical :: BSS_uses_GreenF @@ -152,7 +155,7 @@ integer function variables_BS(ID,iq,local_desc,CLOSE_the_menu,X) end function ! integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,& - & bsL_magn,bsR_magn,bsR_kerr,bsR_pl) + & bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,bsR_pl) use pars, ONLY:SP use X_m, ONLY:X_t implicit none @@ -160,7 +163,7 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,& integer :: iq,ID,i_BS_mat complex(SP) ,optional :: bsE(:) real(SP), pointer,optional :: BsE_corr(:,:),bsR_pl(:,:) - complex(SP),pointer,optional :: bsR_kerr(:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:) + complex(SP),pointer,optional :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:) end function ! integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,Vnp1) diff --git a/src/modules/mod_R_lattice.F b/src/modules/mod_R_lattice.F index 4b3d23e89a..a9e9d8db86 100644 --- a/src/modules/mod_R_lattice.F +++ b/src/modules/mod_R_lattice.F @@ -121,6 +121,7 @@ module R_lattice integer :: n_g_shells ! Number of G-shells integer :: ng_vec ! Number of G-vectors integer :: ng_closed ! Number of G closed + integer :: G_m_G_maxval ! Maximum value of G_m_G integer ,allocatable :: ng_in_shell(:) ! Number of G in each shell integer ,allocatable :: g_rot(:,:) integer ,allocatable :: G_m_G(:,:) diff --git a/src/modules/mod_X.F b/src/modules/mod_X.F index f6a116dba2..74e771a73e 100644 --- a/src/modules/mod_X.F +++ b/src/modules/mod_X.F @@ -65,6 +65,7 @@ module X_m ! Absorption & Polarizability ! integer :: N_BS_E_sorted=0 + integer, allocatable :: BS_E_sorted_indx(:,:,:) complex(SP), allocatable :: BS_E_sorted(:,:,:) complex(SP), allocatable :: Resp_ii(:,:) complex(SP), allocatable :: Resp_ij(:,:) @@ -160,7 +161,7 @@ module X_m integer :: N_messages character(schlen) :: messages(N_MAX_columns) end type - integer, parameter :: N_X_obs=15 + integer, parameter :: N_X_obs=16 type(X_obs_t) :: X_obs(N_X_obs) ! interface @@ -176,7 +177,7 @@ subroutine X_OUTPUT_messages(iq,ig,Vnl,GF,ordering,Q_plus_G,MORE) character(*), optional :: MORE end subroutine ! - subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) + subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes) use pars, ONLY:SP use frequency, ONLY:w_samp use descriptors, ONLY:IO_desc @@ -186,6 +187,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) integer, optional:: IQ,IG type(IO_desc),optional:: DESC real(SP), optional:: data(:) + integer, optional:: indexes(:) end subroutine ! subroutine X_Double_Grid_setup(iq,k,k_FGbz,nTrans_percent,Radius) diff --git a/src/modules/mod_collision_ext.F b/src/modules/mod_collision_ext.F index 79558a71df..0934280332 100644 --- a/src/modules/mod_collision_ext.F +++ b/src/modules/mod_collision_ext.F @@ -92,6 +92,13 @@ module collision_ext type(COLLISIONS_element), allocatable :: HXC_COLL_element(:) type(COLLISIONS_element), allocatable :: P_COLL_element(:) ! + ! On the fly long-range SEX (LSEX) collisions + ! ==================================== + integer :: ng_oscll ! number of G-vectors in the Oscillators + complex(SP), allocatable :: W(:,:,:) ! screened interaction + complex(SP), allocatable :: OSCLL(:,:,:,:,:) ! Oscillators + complex(SP), allocatable :: OSCLL_k(:,:,:,:) ! Oscillators in k, just for the evaluation + ! contains ! subroutine COLLISIONS_naming( H_SE_kind , what ) diff --git a/src/modules/mod_global_XC.F b/src/modules/mod_global_XC.F index afde58dd0c..8bb40f39e2 100644 --- a/src/modules/mod_global_XC.F +++ b/src/modules/mod_global_XC.F @@ -101,6 +101,8 @@ module global_XC H_SE_SRPA_SLT = 325, & ! OEP-COHSEX (SLT apprx) H_SE_COH = 326, & ! Coulomb-Hole H_SE_SEX = 327, & ! Screened-eXchange + H_SE_LSEX = 328, & ! Long range-screened-eXchange + H_SE_LHF = 329, & ! Long range-eXchange H_SE_COHSEX = H_SE_SEX+H_SE_COH ! Coulomb-Hole + Screened-eXchange ! ! Phonons diff --git a/src/modules/mod_interfaces.F b/src/modules/mod_interfaces.F index 6c240a30f9..131d6c7d58 100644 --- a/src/modules/mod_interfaces.F +++ b/src/modules/mod_interfaces.F @@ -171,8 +171,8 @@ integer function eval_G_minus_G(iG,iGo,force_recompute,COMM) type(yMPI_comm), optional :: COMM end function ! - subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,& -& sp_pol_to_load,space,title,impose_free_and_alloc,& + subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,sp_pol_to_load,& +& k_extrema_only,space,title,impose_free_and_alloc,& & force_WFo,keep_states_to_load,quiet) use wave_func, ONLY:WAVEs integer :: iG_max,iGo_max,bands_to_load(2),kpts_to_load(2) @@ -180,16 +180,18 @@ subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,& integer, optional :: sp_pol_to_load(2) character(*),optional :: space character(*),optional :: title + logical ,optional :: k_extrema_only logical ,optional :: impose_free_and_alloc logical ,optional :: force_WFo logical ,optional :: keep_states_to_load logical, optional, intent(in) :: quiet end subroutine ! - subroutine WF_free(WF,keep_fft) + subroutine WF_free(WF,keep_fft,keep_states_to_load) use wave_func, ONLY:WAVEs type(WAVEs) :: WF logical ,optional :: keep_fft + logical ,optional :: keep_states_to_load end subroutine ! subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,N_f,N_m,I_dir,E_k_dir,E_k_ind) @@ -205,21 +207,44 @@ subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,N_f,N_m,I_dir,E_k_dir,E_k_ind) integer , optional :: E_k_ind(n_sp_pol,2) ! k->k' of the indirect gap end subroutine ! - subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,& - & BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL) + subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,& + & BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL) use BS_solvers, ONLY:BSS_n_eig use pars, ONLY:SP use frequency, ONLY:w_samp type(w_samp),intent(in) :: W integer, intent(in) :: iq,i_BS_mat complex(SP), intent(in) :: BS_E(BSS_n_eig) - complex(SP), pointer, intent(in) :: BS_R_left(:),BS_R_right(:) + complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:) real(SP), pointer, intent(in) :: BS_E_SOC_corr(:,:) complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:) - complex(SP), pointer, intent(in) :: BS_R_right_kerr(:) + complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:) real(SP), pointer, intent(in) :: BS_R_PL(:,:) end subroutine ! + subroutine K_diago_right_residuals(mode,i_BS_mat,BS_E,BS_R_right,BS_V_right) + use BS_solvers, ONLY:BSS_n_eig + use pars, ONLY:SP + implicit none + character(*),intent(in) :: mode + integer, intent(in) :: i_BS_mat + complex(SP), intent(in) :: BS_E(BSS_n_eig) + complex(SP), intent(out) :: BS_R_right(BSS_n_eig) + complex(SP), intent(in) :: BS_V_right(:,:) + end subroutine K_diago_right_residuals + ! + subroutine K_diago_left_residuals(mode,i_BS_mat,BS_E,BS_R_left,BS_V_left,BS_overlap) + use BS_solvers, ONLY:BSS_n_eig + use pars, ONLY:SP + implicit none + character(*),intent(in) :: mode + integer, intent(in) :: i_BS_mat + complex(SP), intent(in) :: BS_E(BSS_n_eig) + complex(SP),target, intent(out) :: BS_R_left(BSS_n_eig) + complex(SP), intent(in) :: BS_V_left(:,:) + complex(SP),optional,intent(in) :: BS_overlap(BSS_n_eig,BSS_n_eig) + end subroutine K_diago_left_residuals + ! subroutine PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap) use pars, ONLY:SP use BS, ONLY:BS_H_dim diff --git a/src/modules/mod_nl_optics.F b/src/modules/mod_nl_optics.F index fe637aa313..70b7254d0c 100644 --- a/src/modules/mod_nl_optics.F +++ b/src/modules/mod_nl_optics.F @@ -64,6 +64,9 @@ module nl_optics ! Density matrix complex(SP), allocatable :: dG(:,:,:) ! + ! Sigma sex at equilibrium (for oscillators) + complex(SP), allocatable :: Sigma_SEX_EQ(:,:,:,:) + ! ! Integrator Variables ! integer, parameter :: EULER = 1 @@ -99,8 +102,11 @@ module nl_optics integer, parameter :: LRCW = 6 integer, parameter :: HF = 7 integer, parameter :: SEX = 8 + integer, parameter :: LSEX = 9 + integer, parameter :: LHF = 10 ! logical :: eval_COLLISIONS + logical :: eval_OSCLL logical :: eval_dG ! ! Non-linear spectra parameters @@ -141,6 +147,7 @@ module nl_optics ! logical :: l_use_DIPOLES ! Use standard dipoles (valid only for linear response) logical :: l_eval_CURRENT ! Evaluate current using the commutator v=[H,r] and the IP formulation + logical :: l_test_OSCLL ! Test collisions built using OSCLLs vs standard Collisions ! ! IO variables ! @@ -182,13 +189,14 @@ subroutine NL_alloc(en) YAMBO_ALLOC(V_xc_0,(fft_size,n_spin)) endif ! - if(eval_dG) then - YAMBO_ALLOC(dG,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)) - endif if(l_use_Hxc_collisions) then YAMBO_ALLOC(RT_Vnl_xc,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)) endif ! + if(eval_dG) then + YAMBO_ALLOC(dG,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)) + endif + ! ! Reference energies ! YAMBO_ALLOC(E_full,(1:NL_bands(2),QP_nk,n_sp_pol)) @@ -227,6 +235,10 @@ subroutine NL_alloc_k_distributed(en) YAMBO_ALLOC(I_relax,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),maxval(en%nbf),PAR_Xk_nibz,n_sp_pol)) YAMBO_ALLOC(Ho_plus_Sigma,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),PAR_Xk_nibz,n_sp_pol)) ! + if(Correlation==LSEX.or.Correlation==LHF) then + YAMBO_ALLOC(Sigma_SEX_EQ,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),n_sp_pol,PAR_Xk_nibz)) + endif + ! end subroutine NL_alloc_k_distributed ! subroutine NL_free @@ -267,6 +279,9 @@ subroutine NL_free if(l_use_Hxc_collisions) then YAMBO_FREE(RT_Vnl_xc) endif + if(eval_dG) then + YAMBO_FREE(dG) + endif ! end subroutine NL_free ! diff --git a/src/modules/mod_openmp.F b/src/modules/mod_openmp.F index 408ce42556..8fcb58f977 100644 --- a/src/modules/mod_openmp.F +++ b/src/modules/mod_openmp.F @@ -28,6 +28,7 @@ module openmp integer :: n_threads_K = 0 integer :: n_threads_NL = 0 integer :: n_threads_BZINDX= 0 + integer :: n_threads_OSCLL = 0 integer :: n_threads_now = 0 logical :: master_thread = .TRUE. ! @@ -67,14 +68,15 @@ subroutine OPENMP_initialize( ) ! n_threads = 1 n_threads_limit = 1 - n_threads_now = 1 - n_threads_FFT = 0 - n_threads_X = 1 - n_threads_SE = 1 - n_threads_RT = 1 - n_threads_K = 1 - n_threads_DIP = 1 - n_threads_NL = 1 + n_threads_now = 1 + n_threads_FFT = 0 + n_threads_X = 1 + n_threads_SE = 1 + n_threads_RT = 1 + n_threads_K = 1 + n_threads_DIP = 1 + n_threads_NL = 1 + n_threads_OSCLL= 1 ! n_out_threads = 1 n_inn_threads = 1 diff --git a/src/modules/mod_parallel.F b/src/modules/mod_parallel.F index 0b85f48b88..70d78fca48 100644 --- a/src/modules/mod_parallel.F +++ b/src/modules/mod_parallel.F @@ -75,7 +75,8 @@ module parallel_m ! ! Logicals ! - logical :: l_par_X_T,l_par_X_G,l_par_X_G_finite_q,l_par_DIP,l_par_BZINDX,l_par_SE,l_par_RT,l_par_SC,l_par_NL + logical :: l_par_X_T,l_par_X_G,l_par_X_G_finite_q,l_par_DIP,l_par_BZINDX,l_par_SE, & +& l_par_RT,l_par_SC,l_par_NL,l_par_OSCLL ! ! In a parallel runs, when some loops not involving all CPU's are parallelized it is usefull to keep ! the operations local in the cpu world @@ -540,7 +541,7 @@ integer function i_INTER_CHAIN(N_father,N_child) character(lchlen) function PARALLEL_message(i_s) use stderr, ONLY:intc use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K, & -& n_threads_NL,n_threads_BZINDX,n_threads +& n_threads_NL,n_threads_BZINDX,n_threads_OSCLL,n_threads integer :: i_s ! PARALLEL_message=" " @@ -562,6 +563,7 @@ character(lchlen) function PARALLEL_message(i_s) if (n_threads_RT >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_RT))//"(threads@RT)" if (n_threads_K >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_K))//"(threads@K)" if (n_threads_NL >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_NL))//"(threads@NL)" + if (n_threads_OSCLL >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_OSCLL))//"(threads@OSCLL)" else PARALLEL_message=trim(CPU_structure(i_s)%Short_Description)//"(environment)-"//& & trim(CPU_structure(i_s)%CPU_string)//"(CPUs)-"//& @@ -608,6 +610,8 @@ subroutine CPU_str_reset() CPU_structure(9)%Short_Description="BZINDX" CPU_structure(10)%Long_Description="Phonon_Self_Energy" CPU_structure(10)%Short_Description="PH_SE" + CPU_structure(11)%Long_Description="Oscillators" + CPU_structure(11)%Short_Description="OSCLL" end subroutine ! subroutine COMM_reset(COMM) diff --git a/src/modules/mod_wrapper.F b/src/modules/mod_wrapper.F index 939cf485fa..60997b16db 100644 --- a/src/modules/mod_wrapper.F +++ b/src/modules/mod_wrapper.F @@ -13,7 +13,12 @@ module wrapper ! TRANSA = 'T' or 't', op( A ) = A'. ! TRANSA = 'C' or 'c', op( A ) = conjg( A' ). ! - use pars, ONLY:SP,cI + use pars, ONLY:SP,cI + ! + ! Implementation based on C_F_POINTER assumes that variables are contiguous, + ! which may not be the case + ! + use iso_c_binding, ONLY:C_LOC,C_F_POINTER #ifdef _CUDA use cublas #endif @@ -54,9 +59,9 @@ module wrapper end interface ! interface Vstar_dot_V - module procedure Vstar_dot_V_c1_cpu, Vstar_dot_V_c2_cpu + module procedure Vstar_dot_V_c1_cpu,Vstar_dot_V_c2_cpu,V_dot_V_r1_cpu,V_dot_V_r2_cpu #ifdef _CUDA - module procedure Vstar_dot_V_c1_gpu, Vstar_dot_V_c2_gpu + module procedure Vstar_dot_V_c1_gpu,Vstar_dot_V_c2_gpu,V_dot_V_r1_gpu,V_dot_V_r2_gpu #endif end interface ! @@ -68,9 +73,9 @@ module wrapper end interface ! interface V_dot_V - module procedure V_dot_V_r1_cpu, V_dot_V_c1_cpu, V_dot_V_c2_cpu + module procedure V_dot_V_r1_cpu,V_dot_V_r2_cpu,V_dot_V_c1_cpu,V_dot_V_c2_cpu #ifdef _CUDA - module procedure V_dot_V_c1_gpu + module procedure V_dot_V_r1_gpu,V_dot_V_r2_gpu,V_dot_V_c1_gpu,V_dot_V_c2_gpu #endif end interface ! @@ -356,7 +361,7 @@ end subroutine V_copy complex(SP) function Vstar_dot_V_c1_cpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP),intent(in) :: CX(*),CY(*) + complex(SP),intent(in) :: CX(:),CY(:) #if defined _DOUBLE complex(SP)::zdotc Vstar_dot_V_c1_cpu=ZDOTC(N,CX,1,CY,1) @@ -369,7 +374,7 @@ end function Vstar_dot_V_c1_cpu complex(SP) function Vstar_dot_V_c2_cpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP),intent(in) :: CX(:,:),CY(:,:) + complex(SP),target,contiguous,intent(in) :: CX(:,:),CY(:,:) #if defined _DOUBLE complex(SP)::zdotc Vstar_dot_V_c2_cpu=ZDOTC(N,CX,1,CY,1) @@ -383,7 +388,7 @@ end function Vstar_dot_V_c2_cpu complex(SP) function Vstar_dot_V_c1_gpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP), device, intent(in) :: CX(*),CY(*) + complex(SP), device, intent(in) :: CX(:),CY(:) ! #if defined _DOUBLE Vstar_dot_V_c1_gpu=cublasZdotc(N,CX,1,CY,1) @@ -395,7 +400,10 @@ end function Vstar_dot_V_c1_gpu complex(SP) function Vstar_dot_V_c2_gpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP), device, intent(in) :: CX(:,:),CY(:,:) + complex(SP), device, target, intent(in) :: CX(:,:),CY(:,:) + !real(SP),pointer :: CX1D(:),CY1D(:) + !call C_F_POINTER(C_LOC(CX),CX1D,(/N/)) + !call C_F_POINTER(C_LOC(CY),CY1D,(/N/)) ! #if defined _DOUBLE Vstar_dot_V_c2_gpu=cublasZdotc(N,CX,1,CY,1) @@ -403,7 +411,6 @@ complex(SP) function Vstar_dot_V_c2_gpu(N,CX,CY) Vstar_dot_V_c2_gpu=cublasCdotc(N,CX,1,CY,1) #endif end function Vstar_dot_V_c2_gpu - ! #endif ! !============== @@ -445,7 +452,7 @@ end function Vstar_dot_VV_c1_gpu real(SP) function V_dot_V_r1_cpu(N,CX,CY) implicit none integer, intent(in) :: N - real(SP),intent(in) :: CX(*),CY(*) + real(SP),intent(in) :: CX(:),CY(:) #if defined _DOUBLE real(SP)::ddot V_dot_V_r1_cpu=DDOT(N,CX,1,CY,1) @@ -454,11 +461,64 @@ real(SP) function V_dot_V_r1_cpu(N,CX,CY) V_dot_V_r1_cpu=SDOT(N,CX,1,CY,1) #endif end function V_dot_V_r1_cpu + ! + real(SP) function V_dot_V_r2_cpu(N,CX,CY) + implicit none + integer, intent(in) :: N + real(SP),target,intent(in) :: CX(:,:),CY(:,:) +#if defined _DOUBLE + real(SP)::ddot +#else + real(SP)::sdot +#endif + !real(SP),pointer :: CX1D(:),CY1D(:) + !call C_F_POINTER(C_LOC(CX),CX1D,(/N/)) + !call C_F_POINTER(C_LOC(CY),CY1D,(/N/)) +#if defined _DOUBLE + V_dot_V_r2_cpu=DDOT(N,CX,1,CY,1) +#else + V_dot_V_r2_cpu=SDOT(N,CX,1,CY,1) +#endif + end function V_dot_V_r2_cpu + ! +#ifdef _CUDA + real(SP) function V_dot_V_r1_gpu(N,CX,CY) + implicit none + integer, intent(in) :: N + real(SP), device, intent(in) :: CX(:),CY(:) +#if defined _DOUBLE + real(SP)::ddot + V_dot_V_r1_gpu=cublasDDOT(N,CX,1,CY,1) +#else + real(SP)::sdot + V_dot_V_r1_gpu=cublasSDOT(N,CX,1,CY,1) +#endif + end function V_dot_V_r1_gpu + ! + real(SP) function V_dot_V_r2_gpu(N,CX,CY) + implicit none + integer, intent(in) :: N + real(SP), device, target, intent(in) :: CX(:,:),CY(:,:) +#if defined _DOUBLE + real(SP)::ddot +#else + real(SP)::sdot +#endif + !real(SP),pointer :: CX1D(:),CY1D(:) + !call C_F_POINTER(C_LOC(CX),CX1D,(/N/)) + !call C_F_POINTER(C_LOC(CY),CY1D,(/N/)) +#if defined _DOUBLE + V_dot_V_r2_gpu=cublasDDOT(N,CX,1,CY,1) +#else + V_dot_V_r2_gpu=cublasSDOT(N,CX,1,CY,1) +#endif + end function V_dot_V_r2_gpu +#endif ! complex(SP) function V_dot_V_c1_cpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP),intent(in) :: CX(*),CY(*) + complex(SP),intent(in) :: CX(:),CY(:) #if defined _DOUBLE complex(SP)::zdotu V_dot_V_c1_cpu=ZDOTU(N,CX,1,CY,1) @@ -471,12 +531,18 @@ end function V_dot_V_c1_cpu complex(SP) function V_dot_V_c2_cpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP),intent(in) :: CX(:,:),CY(:,:) + complex(SP),target,intent(in) :: CX(:,:),CY(:,:) #if defined _DOUBLE complex(SP)::zdotu - V_dot_V_c2_cpu=ZDOTU(N,CX,1,CY,1) #else complex(SP)::cdotu +#endif + !complex(SP),pointer :: CX1D(:),CY1D(:) + !call C_F_POINTER(C_LOC(CX),CX1D,(/N/)) + !call C_F_POINTER(C_LOC(CY),CY1D,(/N/)) +#if defined _DOUBLE + V_dot_V_c2_cpu=ZDOTU(N,CX,1,CY,1) +#else V_dot_V_c2_cpu=CDOTU(N,CX,1,CY,1) #endif end function V_dot_V_c2_cpu @@ -485,7 +551,7 @@ end function V_dot_V_c2_cpu complex(SP) function V_dot_V_c1_gpu(N,CX,CY) implicit none integer, intent(in) :: N - complex(SP), device, intent(in) :: CX(*),CY(*) + complex(SP), device, intent(in) :: CX(:),CY(:) #if defined _DOUBLE complex(SP)::zdotu V_dot_V_c1_gpu=cublasZDOTU(N,CX,1,CY,1) @@ -494,6 +560,25 @@ complex(SP) function V_dot_V_c1_gpu(N,CX,CY) V_dot_V_c1_gpu=cublasCDOTU(N,CX,1,CY,1) #endif end function V_dot_V_c1_gpu + ! + complex(SP) function V_dot_V_c2_gpu(N,CX,CY) + implicit none + integer, intent(in) :: N + complex(SP), device, target, intent(in) :: CX(:,:),CY(:,:) +#if defined _DOUBLE + complex(SP)::zdotu +#else + complex(SP)::cdotu +#endif + !complex(SP),pointer :: CX1D(:),CY1D(:) + !call C_F_POINTER(C_LOC(CX),CX1D,(/N/)) + !call C_F_POINTER(C_LOC(CY),CY1D,(/N/)) +#if defined _DOUBLE + V_dot_V_c2_gpu=cublasZDOTU(N,CX,1,CY,1) +#else + V_dot_V_c2_gpu=cublasCDOTU(N,CX,1,CY,1) +#endif + end function V_dot_V_c2_gpu #endif ! !============== diff --git a/src/modules/mod_wrapper_omp.F b/src/modules/mod_wrapper_omp.F index 47669d8ee9..029ffdd528 100644 --- a/src/modules/mod_wrapper_omp.F +++ b/src/modules/mod_wrapper_omp.F @@ -43,6 +43,10 @@ module wrapper_omp module procedure V_dot_V_r1_omp, V_dot_V_c1_omp, V_dot_V_c2_omp end interface ! + interface Vstar_dot_V_omp + module procedure V_dot_V_r1_omp, Vstar_dot_V_c1_omp, Vstar_dot_V_c2_omp + end interface + ! public :: V_copy_omp public :: Vstar_dot_V_omp public :: V_dot_V_omp @@ -283,9 +287,9 @@ end subroutine V_copy_omp ! DOT PRODUCTS !============== ! - complex(SP) function Vstar_dot_V_omp(N,CX,CY) + complex(SP) function Vstar_dot_V_c1_omp(N,CX,CY) integer, intent(in) :: N - complex(SP),intent(in) :: CX(*),CY(*) + complex(SP),intent(in) :: CX(:),CY(:) ! #if defined _OPENMP_INT_LINALG integer :: i @@ -297,19 +301,45 @@ complex(SP) function Vstar_dot_V_omp(N,CX,CY) ctmp=ctmp +conjg(CX(i))*CY(i) enddo !$omp end parallel do - Vstar_dot_V_omp=ctmp + Vstar_dot_V_c1_omp=ctmp + else + Vstar_dot_V_c1_omp=Vstar_dot_V(N,CX,CY) + endif +#else + Vstar_dot_V_c1_omp=Vstar_dot_V(N,CX,CY) +#endif + ! + end function Vstar_dot_V_c1_omp + ! + complex(SP) function Vstar_dot_V_c2_omp(N,CX,CY) + integer, intent(in) :: N + complex(SP),intent(in) :: CX(:,:),CY(:,:) + ! +#if defined _OPENMP_INT_LINALG + integer :: i,j + complex(SP) :: ctmp + if (n_threads_now>1) then + ctmp=0.0_SP + !$omp parallel do default(shared), private(i), reduction(+:ctmp), collapse(2) + do j=1,N/size(CX,1) + do i=1,size(CX,1) + ctmp=ctmp +conjg(CX(i,j))*CY(i,j) + enddo + enddo + !$omp end parallel do + Vstar_dot_V_c2_omp=ctmp else - Vstar_dot_V_omp=Vstar_dot_V(N,CX,CY) + Vstar_dot_V_c2_omp=Vstar_dot_V(N,CX,CY) endif #else - Vstar_dot_V_omp=Vstar_dot_V(N,CX,CY) + Vstar_dot_V_c2_omp=Vstar_dot_V(N,CX,CY) #endif ! - end function Vstar_dot_V_omp + end function Vstar_dot_V_c2_omp ! real(SP) function V_dot_V_r1_omp(N,CX,CY) integer, intent(in) :: N - real(SP),intent(in) :: CX(*),CY(*) + real(SP),intent(in) :: CX(:),CY(:) ! #if defined _OPENMP_INT_LINALG integer :: i @@ -332,7 +362,7 @@ end function V_dot_V_r1_omp ! complex(SP) function V_dot_V_c1_omp(N,CX,CY) integer, intent(in) :: N - complex(SP),intent(in) :: CX(*),CY(*) + complex(SP),intent(in) :: CX(:),CY(:) ! #if defined _OPENMP_INT_LINALG integer :: i diff --git a/src/nloptics/.objects b/src/nloptics/.objects index 1de603485f..708eaa3f0d 100644 --- a/src/nloptics/.objects +++ b/src/nloptics/.objects @@ -1,5 +1,6 @@ #if defined _NL -objs = NL_driver.o NL_initialize.o NL_start_and_restart.o el_density_vbands.o NL_Integrator.o NL_damping.o \ +objs = NL_driver.o NL_initialize.o NL_start_and_restart.o el_density_vbands.o NL_Integrator.o \ + NL_damping.o NL_test_collisions.o \ NL_Hamiltonian.o NL_output.o DIP_polarization.o NL_average_operator.o EXC_macroscopic_JGM.o \ NL_databases_IO.o NL_build_dG_lesser.o NL_build_valence_bands.o EXP_step.o \ NL_average_operator.o NL_current.o NL_Berry_current.o EXP_op.o INVINT_step.o RK_basestep.o diff --git a/src/nloptics/DOUBLE_project.dep b/src/nloptics/DOUBLE_project.dep index fbac890c90..9c580b7e80 100644 --- a/src/nloptics/DOUBLE_project.dep +++ b/src/nloptics/DOUBLE_project.dep @@ -16,6 +16,7 @@ NL_initialize.o NL_output.o NL_start_and_restart.o + NL_test_collisions.o RK_basestep.o el_density_vbands.o diff --git a/src/nloptics/NL_Hamiltonian.F b/src/nloptics/NL_Hamiltonian.F index b7df9a6327..d8988afed1 100644 --- a/src/nloptics/NL_Hamiltonian.F +++ b/src/nloptics/NL_Hamiltonian.F @@ -27,7 +27,7 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands) use xc_functionals, ONLY:V_xc,XC_potential_driver use nl_optics, ONLY:Correlation,V_xc_0,full_rho,IPA,E_full,LRC,JGM,LRCW,l_use_DIPOLES,NL_LRC_alpha, & & NL_initial_P,E_tot,E_ext,E_ks,E_xc_0,I_relax,Alpha_ED,dG,eval_dG, & -& NL_bands,VAL_BANDS,NL_P +& NL_bands,VAL_BANDS,NL_P,LSEX,LHF use global_XC, ONLY:WF_xc_functional,WF_kind use parallel_m, ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index use wrapper_omp, ONLY:M_plus_alpha_M_omp @@ -169,8 +169,10 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands) ! Build the Hamiltonian ! ===================== ! +!$OMP WORKSHARE Ho_plus_Sigma=cZERO I_relax =cZERO +!$OMP END WORKSHARE ! do i_sp_pol=1,n_sp_pol do ik=1,QP_nk @@ -217,10 +219,12 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands) ! endif ! +! if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_nl(dG,H_nl_sc,k,q,E,ik,i_sp_pol) + if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_vbands(V_bands,H_nl_sc,k,q,E,ik,i_sp_pol) + ! if(l_use_Hxc_collisions) then call M_plus_alpha_M_omp(RT_nbands,cONE,RT_Vnl_xc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),ik,i_sp_pol),& & H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))) - endif ! ! Damping: NL_damping fills the I_relax matrix diff --git a/src/nloptics/NL_driver.F b/src/nloptics/NL_driver.F index 5e8132231f..cba5a617ea 100644 --- a/src/nloptics/NL_driver.F +++ b/src/nloptics/NL_driver.F @@ -5,7 +5,7 @@ ! ! Authors (see AUTHORS file for details): MG CA ! -subroutine NL_driver(E,X,k,q,Dip) +subroutine NL_driver(E,X,Xw,k,q,Dip) ! use pars, ONLY:pi,SP,cZERO,cONE,schlen use vec_operate, ONLY:axis_rotation @@ -14,7 +14,7 @@ subroutine NL_driver(E,X,k,q,Dip) use QP_m, ONLY: QP_ng_SH,QP_ng_Sx use LIVE_t, ONLY:live_timing use electrons, ONLY:levels,n_sp_pol - use collision_ext, ONLY:COLLISIONS_CV_only + use collision_ext, ONLY:COLLISIONS_CV_only,HXC_collisions use X_m, ONLY:X_t use DIPOLES, ONLY:DIPOLE_t,DIP_alloc,DIP_S use wave_func, ONLY:WF,wf_ng @@ -22,16 +22,18 @@ subroutine NL_driver(E,X,k,q,Dip) use hamiltonian, ONLY:WF_Go_indx,H_QP_table_setup,H_potential use R_lattice, ONLY:bz_samp,qindx_free use fields, ONLY:Efield + use frequency, ONLY:w_samp use QP_CTL_m, ONLY:QP_apply + use QP_m, ONLY:QP_nk use nl_optics, ONLY:n_frequencies,Correlation,IPA,NL_alloc,NL_free,V_bands,eval_dG,NL_J,NL_initial_J,NL_initial_versor, & & NL_estep,NL_er,Integrator,l_use_DIPOLES,NL_P_t,JGM,TDDFT,NL_P,loop_on_angles,n_angles,& & E_ext_t,E_tot_t,E_ks_t,E_ks,E_tot,E_ext,runs_done,n_runs_todo,l_eval_CURRENT,NL_rot_axis,& & HF,SEX,NL_bands,NL_nbands,NL_alloc_k_distributed,NL_J_t,n_runs,loop_on_frequencies, & -& l_pump_and_probe +& l_pump_and_probe,eval_OSCLL,l_test_OSCLL,LSEX,LHF,Sigma_SEX_EQ use real_time, ONLY:RT_step,NE_steps,NE_i_time,RT_nbands,RT_bands,eval_DFT,eval_HARTREE use IO_m, ONLY:io_COLLs use parallel_int, ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index - use parallel_m, ONLY:PAR_IND_freqs,PAR_IND_freqs_ID,PAR_IND_Xk_ibz + use parallel_m, ONLY:PAR_IND_freqs,PAR_IND_freqs_ID,PAR_IND_Xk_ibz,PAR_Xk_ibz_index use collision_ext, ONLY:COLLISIONS_have_HARTREE ! #include @@ -42,12 +44,13 @@ subroutine NL_driver(E,X,k,q,Dip) type(bz_samp) :: k,q type(X_t) :: X(2) type(DIPOLE_t) :: Dip + type(w_samp) :: Xw(2) ! logical, external :: stop_now ! ! Work Space ! - integer :: i_f,i1,i_sp_pol,wf_ng_save,io_err + integer :: i_f,i1,i_sp_pol,wf_ng_save,io_err,ik,ik_mem character(schlen) :: l_message logical :: l_dynamics_is_over integer :: ID_RESTART @@ -79,7 +82,8 @@ subroutine NL_driver(E,X,k,q,Dip) ! ! Dipoles & WF dimensions !========================= - call WF_and_dipole_dimensions(X,k) + + call WF_and_dipole_dimensions(X(1),k) ! ! QP dimensions !================ @@ -87,10 +91,7 @@ subroutine NL_driver(E,X,k,q,Dip) ! ! Non_Linear parallization ! =================================== - call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.,Dip=Dip,X=X(1)) - call PARALLEL_global_indexes(E,k,q,"Non_Linear",Dip=Dip,X=X(1)) - call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,CLEAN_UP=.TRUE.) - call PARALLEL_WF_index( ) + call setup_NL_parallelization() ! !Load Dipole after the NL parallelization ! ======================================== @@ -100,11 +101,16 @@ subroutine NL_driver(E,X,k,q,Dip) ! ! Generate bare_qpg if required !============================= - if(eval_HARTREE.or.CORRELATION==JGM) call col_driver(maxval((/wf_ng,QP_ng_Sx,QP_ng_SH/)),q) + if(eval_HARTREE.or.CORRELATION==JGM.or.CORRELATION==LSEX.or.CORRELATION==LHF) & +& call col_driver(maxval((/wf_ng,QP_ng_Sx,QP_ng_SH/)),q) ! ! Free memory: qindx_* and unused DIPOLES ! ===================================================== - call qindx_free('ALL') + if(.not.eval_OSCLL) then + call qindx_free('ALL') + else + call qindx_free('X B C') + endif ! call DIP_alloc('P_square') if(l_use_DIPOLES) then @@ -122,16 +128,26 @@ subroutine NL_driver(E,X,k,q,Dip) wf_ng_save =wf_ng if(eval_DFT) then wf_ng =max(QP_ng_Sx,QP_ng_SH) + elseif(eval_OSCLL) then + wf_ng =max(wf_ng,QP_ng_Sx,QP_ng_SH) else wf_ng =QP_ng_SH endif WF_Go_indx =1 ! + if(eval_OSCLL.and..not.l_test_OSCLL) then + io_COLLs=.false. + HXC_collisions%N=0 + endif + ! if(io_COLLs) then ! ! check if they exist ! call COLLISIONS_load(.true.) + ! + if(l_test_OSCLL.and..not.l_use_Hxc_collisions) & +& call error("You should calculate collisions before the testing with: yambo_nl -e -v f/sex") ! if(.not.l_use_Hxc_collisions .and. (Correlation==SEX.or.Correlation==HF)) & & call error(' HF / SEX need Collisions. Cannot be computed on the fly.') @@ -152,8 +168,27 @@ subroutine NL_driver(E,X,k,q,Dip) ! endif ! - if(eval_HARTREE.or.eval_DFT) & -& call WF_load(WF,wf_ng,WF_Go_indx,(/1,NL_bands(2)/),(/1,k%nibz/),space='R',title='-NL') + if(eval_OSCLL) then + ! + call setup_OSCLL_parallelization() + ! + ! Evaluate oscillators if not present + call OSCLL_eval(q, k, E) + ! + ! Load the LSEX potential W(q,G,G') or V(q,G,G') + call LSEX_potential(q, X(2), Xw(2)) + ! + ! Load oscillators + ! + call OSCLL_load(q,k) + ! + call setup_NL_parallelization() + ! + endif + ! + if(eval_HARTREE.or.eval_DFT) then + call WF_load(WF,wf_ng,WF_Go_indx,(/1,NL_bands(2)/),(/1,k%nibz/),space='R',title='-NL') + endif ! ! Load Collisions if necessary ! ============================ @@ -171,11 +206,33 @@ subroutine NL_driver(E,X,k,q,Dip) call NL_alloc(E) call NL_alloc_k_distributed(E) ! + if(l_test_OSCLL) then + call NL_test_collisions(k,q) + return + endif + ! ! Initialization ! ================== call section('*','NL Start and Restart') call NL_start_and_restart(E,k,Dip) ! + ! + if(eval_OSCLL) then + ! + ! If I use oscillators I store the equilibrium part of long-range SEX + ! + Sigma_SEX_EQ=cZERO + ! + do ik=1,QP_nk + if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle + ik_mem=PAR_Xk_ibz_index(ik) + do i_sp_pol=1,n_sp_pol + call OSCLL_compose_vbands(V_bands,Sigma_SEX_EQ(:,:,i_sp_pol,ik_mem),k,q,E,ik,i_sp_pol) + enddo + enddo + ! + endif + ! ! Check if calculation was already done ! ===================================== if(n_runs_todo==0) return @@ -278,4 +335,40 @@ subroutine NL_driver(E,X,k,q,Dip) call DIP_alloc('DIP_R') call DIP_alloc('DIP_v') ! + contains + ! + subroutine setup_OSCLL_parallelization + use parallel_m, ONLY:PAR_IND_G_b + implicit none + ! + ! Restore original number of bands + ! + X(1)%ib =(/NL_bands(1),NL_bands(2)/) + ! + call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.,X=X(1)) + call PARALLEL_global_indexes(E,k,q,"Oscillators",X=X(1)) + call PARALLEL_WF_distribute(B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.) + call PARALLEL_WF_index( ) + ! + end subroutine setup_OSCLL_parallelization + ! + subroutine setup_NL_parallelization() + use parallel_m, ONLY:PAR_IND_Xk_ibz + ! + implicit none + ! + ! Restore original number of bands + ! + X(1)%ib =(/NL_bands(1),NL_bands(2)/) + ! + call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.) + call PARALLEL_WF_distribute(CLEAN_UP=.TRUE.) + ! + call PARALLEL_global_indexes(E,k,q,"Non_Linear",Dip=Dip,X=X(1)) + call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,CLEAN_UP=.TRUE.) + call PARALLEL_WF_index( ) + ! + end subroutine setup_NL_parallelization + + ! end subroutine NL_driver diff --git a/src/nloptics/NL_initialize.F b/src/nloptics/NL_initialize.F index 4e0291123f..dadb8ffdbd 100644 --- a/src/nloptics/NL_initialize.F +++ b/src/nloptics/NL_initialize.F @@ -17,15 +17,17 @@ subroutine NL_initialize(E,k) use X_m, ONLY:global_gauge use zeros, ONLY:zero_dfl use com, ONLY:msg + use QP_m, ONLY:QP_ng_Sc,QP_ng_Sx,QP_ng_Sh + use collision_ext, ONLY:ng_oscll use R_lattice, ONLY:bz_samp,k_map use fields, ONLY:Efield,Efield_strength,n_ext_fields,EtoT use real_time, ONLY:RT_step,NE_steps,NE_tot_time,eval_HARTREE,eval_DFT,l_RT_induced_field,l_NE_with_fields, & & Phase_LifeTime,Integrator_name,RT_dyn_step,NE_i_time,RT_potential,RAD_LifeTime use RT_control, ONLY:SETUP_RT_IO_type,OBS_RT_IO_t use nl_optics, ONLY:l_use_DIPOLES,EULER,EULEREXP,RK2,RK4,RK2EXP,RK4EXP,HEUN,INVINT,Integrator,CRANK_NICOLSON, & -& NL_damping,NL_correlation,Correlation,NL_er,NL_estep,n_frequencies,IPA,TDDFT,HARTREE,LRC,JGM, & -& LRCW,SEX,HF,eval_dG,eval_COLLISIONS,NL_bands,NL_verb_name,VERB_HIGH,VERB_LOW,NL_verbosity, & -& l_eval_CURRENT,l_pump_and_probe,DephMult,l_velocity_IPA,n_angles, & +& NL_damping,NL_correlation,Correlation,NL_er,NL_estep,n_frequencies,IPA,TDDFT,HARTREE,LRC,JGM, & +& LRCW,SEX,HF,eval_dG,eval_COLLISIONS,NL_bands,NL_verb_name,VERB_HIGH,VERB_LOW,NL_verbosity,LSEX,& +& l_eval_CURRENT,l_pump_and_probe,DephMult,l_velocity_IPA,n_angles,l_test_OSCLL,LHF,eval_OSCLL, & & NL_ang_step,n_runs,NL_rot_axis,loop_on_frequencies,loop_on_angles,NL_initial_versor use electric, ONLY:l_force_SndOrd,l_P_periodic use parser_m, ONLY:parser @@ -50,14 +52,11 @@ subroutine NL_initialize(E,k) call parser('InducedField' , l_RT_induced_field) call parser('EvalCurrent' , l_eval_CURRENT) call parser('FrPolPerdic' , l_P_periodic) + call parser('TestOSCLL', l_test_OSCLL) call parser('NoComprCOLL', l_no_compress_COLL) ! - COLLISIONS_load_SP =.not.l_no_compress_COLL - COLLISIONS_compr =.not.l_no_compress_COLL - ! if(l_use_DIPOLES) call warning(' Fixed dipoles: only linear response properties are correct!') if(l_P_periodic) call msg('sr','[NL] Enforce periodicity of the polarization repect to Efield') - if(SP==DP.and.COLLISIONS_load_SP) call msg('sr','COLLISIONS in single precisions') ! ! Velocity gauge mode ! ======================================= @@ -281,6 +280,7 @@ subroutine NL_initialize(E,k) eval_DFT =.false. eval_dG =.false. eval_COLLISIONS=.false. + eval_OSCLL =.false. ! ! NB.: This part duplicates what is in ! INIT.F @@ -322,6 +322,22 @@ subroutine NL_initialize(E,k) eval_COLLISIONS=.true. l_sc_coh =.true. l_sc_sex =.true. + case('LSEX') + Correlation = LSEX + RT_potential ='LSEX' + eval_HARTREE =.true. + ! Turn off Hartree if required + if(QP_ng_SH<=1) eval_HARTREE=.false. + eval_OSCLL =.true. + eval_dG =.false. + case('LHF') + Correlation = LHF + RT_potential ='LHF' + eval_HARTREE =.true. + ! Turn off Hartree if required + if(QP_ng_SH<=1) eval_HARTREE=.false. + eval_OSCLL =.true. + eval_dG =.false. case('HF') Correlation=HF RT_potential ='HARTREE+FOCK' @@ -334,6 +350,50 @@ subroutine NL_initialize(E,k) call error("Unknow correlation in the response function") end select ! + call msg('rs','Correlation in the reponse functions: '//trim(NL_correlation)) + ! + if(l_test_OSCLL.and.(Correlation==LSEX.or.Correlation==LHF)) then + ! + eval_COLLISIONS=.TRUE. + eval_dG =.TRUE. + ! + ! I force loading collision for testing porpouse + ! + if(Correlation==LHF) then + RT_potential='FOCK' + elseif(Correlation==LSEX) then + RT_potential='SEX' + endif + ! + call msg('s','') + call msg('s','*****************************************') + call msg('s','**** TEST TEST Oscillators TEST TEST ****') + call msg('s','*****************************************') + call msg('s','') + ! + endif + ! + if(eval_COLLISIONS) then + COLLISIONS_load_SP =.not.l_no_compress_COLL + COLLISIONS_compr =.not.l_no_compress_COLL + if(SP==DP.and.COLLISIONS_load_SP) call msg('sr','COLLISIONS in single precisions') + endif + ! + ! Set the number of g-vectors for oscillators and W(q,G,G') or V(q,G) + ! + if(Correlation==LHF.or.Correlation==LSEX) then + if(Correlation==LSEX.and.QP_ng_Sx/=QP_ng_Sc) & + & call error(' EXXRLvcs /= CORRLvcs not possible with oscillator, set the same value in input') + ng_oscll=QP_ng_Sx + call msg('rs','Number of G-vectors in oscillators :',ng_oscll) + endif + ! + if(QP_ng_SH<=1) then + call msg('rs','Hartree term turned off ') + else + call msg('rs','Number of G-vectors in Hartree :',QP_ng_SH) + endif + ! if(eval_COLLISIONS) then ! if(COLLISIONS_CV_only) call warning(' ONLY cv scattering in COLLISIONS! ') @@ -345,7 +405,6 @@ subroutine NL_initialize(E,k) endif endif ! - ! H_potential = RT_potential ! ! Frequencies or Angle range diff --git a/src/nloptics/NL_project.dep b/src/nloptics/NL_project.dep index fbac890c90..9c580b7e80 100644 --- a/src/nloptics/NL_project.dep +++ b/src/nloptics/NL_project.dep @@ -16,6 +16,7 @@ NL_initialize.o NL_output.o NL_start_and_restart.o + NL_test_collisions.o RK_basestep.o el_density_vbands.o diff --git a/src/nloptics/NL_start_and_restart.F b/src/nloptics/NL_start_and_restart.F index 9c4f8c0000..59521e3339 100644 --- a/src/nloptics/NL_start_and_restart.F +++ b/src/nloptics/NL_start_and_restart.F @@ -20,7 +20,7 @@ subroutine NL_start_and_restart(E,k,Dip) use pars, ONLY:cZERO,rZERO,cONE,lchlen use com, ONLY:msg use electrons, ONLY:levels,n_sp_pol - use real_time, ONLY:rho_reference + use real_time, ONLY:rho_reference,eval_HARTREE,eval_DFT use R_lattice, ONLY:bz_samp use X_m, ONLY:X_t use DIPOLES, ONLY:DIPOLE_t @@ -71,7 +71,7 @@ subroutine NL_start_and_restart(E,k,Dip) forall(i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE enddo ! - if(Correlation/=IPA.and..not.COLLISIONS_have_HARTREE) then + if(Correlation/=IPA.and..not.COLLISIONS_have_HARTREE.and.(eval_HARTREE.or.eval_DFT)) then ! call NL_build_valence_bands(E,V_bands,VAL_BANDS,1) call el_density_vbands(E,k,full_rho,VAL_BANDS) diff --git a/src/nloptics/NL_test_collisions.F b/src/nloptics/NL_test_collisions.F new file mode 100644 index 0000000000..c1a8b93a24 --- /dev/null +++ b/src/nloptics/NL_test_collisions.F @@ -0,0 +1,92 @@ +! +! Copyright (C) 2000-2017 the LUMEN team +! +! Authors (see AUTHORS file for details): MG CA +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine NL_test_collisions(k,q) + ! + ! Test-collisions built on the fly from V/W + ! VS + ! collisions stored on disk (yambo_nl -e -v sex/f) + ! + use pars, ONLY:cZERO,SP,rZERO + use R_lattice, ONLY:nXkibz,bz_samp,qindx_S + use collision_ext, ONLY:HXC_COLL_element,HXC_collisions + use nl_optics, ONLY:NL_bands + use com, ONLY:msg + ! + implicit none + ! + type(bz_samp), intent(in) :: k,q + ! + integer :: i_kmq,i_qp,ib,ibp,i_coll,i_coll_mem + integer :: i_k,i_n,i_m,i_spin + integer :: ic1 + complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz) + complex(SP) :: COLL_OSCLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz) + ! + real(SP) :: max_diff,diff + ! + call msg('s','Collisions Test COLL vs OSCLL') + call msg('s','') + ! + max_diff=rZERO + ! + do i_coll=1,HXC_collisions%N + ! + i_k =HXC_collisions%state(i_coll,3) + i_n =HXC_collisions%state(i_coll,1) + i_m =HXC_collisions%state(i_coll,2) + i_spin =HXC_collisions%state(i_coll,4) + i_coll_mem=HXC_collisions%PAR_map(i_coll) + ! + ! Spin not supported for the moment + ! + call OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL_OSCLL) + ! + ic1=1 + ! + COLL=cZERO + ! + do i_kmq=1,nXkibz + ! + do ibp=NL_bands(1),NL_bands(2) + do ib=NL_bands(1),NL_bands(2) + if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=="t") then + COLL(ib,ibp,i_kmq)=HXC_COLL_element(i_coll_mem)%v_c(ic1) + ic1=ic1+1 + endif + ! + diff=abs(COLL(ib,ibp,i_kmq)-COLL_OSCLL(ib,ibp,i_kmq)) + write(*,'(6i5,2e14.6)') i_k,i_m,i_n,i_kmq,ib,ibp,abs(COLL(ib,ibp,i_kmq)),abs(COLL_OSCLL(ib,ibp,i_kmq)) + if(max_diff<=diff) max_diff=diff + ! + enddo + enddo + ! + enddo + ! + enddo + ! + call msg('s','') + call msg('s','Max error in collisions : ',max_diff) + call msg('s','') + ! +end subroutine NL_test_collisions diff --git a/src/nloptics/el_density_vbands.F b/src/nloptics/el_density_vbands.F index c2f6a35aae..896f4b373b 100644 --- a/src/nloptics/el_density_vbands.F +++ b/src/nloptics/el_density_vbands.F @@ -47,21 +47,23 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS) integer :: i1,ik,is,rho_syms,ir,ik_mem real(SP) :: rho_no_sym(fft_size) ! +!$OMP WORKSHARE rho=rZERO rho_no_sym=rZERO +!$OMP END WORKSHARE ! do ik=1,Xk%nibz ! if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle ik_mem=PAR_Xk_ibz_index(ik) ! -!$omp parallel do default(shared), private(ir,is) - do ir=1,fft_size - do is=1,n_sp_pol + do is=1,n_sp_pol +!$OMP WORKSHARE + forall(ir=1:fft_size) rho_no_sym(ir)=rho_no_sym(ir)+real(spin_occ,SP)*Xk%weights(ik)*sum(abs(VAL_BANDS(ir,:en%nbf(is),ik_mem,is))**2._SP) - enddo + end forall +!$OMP END WORKSHARE enddo -!$omp end parallel do ! enddo ! @@ -69,20 +71,18 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS) ! ! Simmetrization ! +!$OMP WORKSHARE rho_syms=nsym/(i_time_rev+1) +!$OMP END WORKSHARE ! -!$omp parallel do default(shared), private(ir,i1) - do ir=1,fft_size - do i1=1,rho_syms - rho(ir)=rho(ir)+real(rho_no_sym(fft_rot_r(ir,i1)),SP)/real(nsym,SP) - enddo - enddo -!$omp end parallel do +!$OMP WORKSHARE + forall(ir=1:fft_size) + rho(ir)=rho(ir)+sum(rho_no_sym(fft_rot_r(ir,1:rho_syms)))/real(nsym,SP) + end forall +!$OMP END WORKSHARE ! -!$omp parallel do default(shared), private(ir) - do ir=1,fft_size - rho(ir)=(1._SP+i_time_rev)*rho(ir) - enddo -!$omp end parallel do +!$OMP WORKSHARE + rho=real(1._SP+i_time_rev,SP)*rho +!$OMP END WORKSHARE ! end subroutine diff --git a/src/output/K_OUTPUT.F b/src/output/K_OUTPUT.F index feb4ac07ee..25dad42431 100644 --- a/src/output/K_OUTPUT.F +++ b/src/output/K_OUTPUT.F @@ -14,12 +14,12 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS) & diam_term_exact,para_term_w0,l_abs_prop_chi_bse,& & l_eels_from_inversion,Co_factor use BS, ONLY:BSE_mode,BS_K_is_ALDA,BS_H_dim,BS_K_coupling,& -& l_BS_anomalous_Hall +& l_BS_anomalous_Hall,l_BS_Esort_indx use stderr, ONLY:STRING_match use X_m, ONLY:Epsilon_ii,eps_2_alpha,X_OUTPUT_driver,Epsilon_ij,& & X_do_obs,X_OUTPUT_messages,X_dichroism,Joint_DOS,BS_E_sorted,N_BS_E_sorted,& -& X_magnons,i_G_shift,Q_plus_G_pt,Q_plus_G_sq_modulus,Resp_ii,Resp_ij,& -& l_drude,skip_cond_sum_rule,X_drude_term,global_gauge +& BS_E_sorted_indx,X_magnons,i_G_shift,Q_plus_G_pt,Q_plus_G_sq_modulus,& +& Resp_ii,Resp_ij,l_drude,skip_cond_sum_rule,X_drude_term,global_gauge use com, ONLY:msg,com_compose_msg use R_lattice, ONLY:bare_qpg,FineGd_desc,nkbz use electrons, ONLY:spin_occ @@ -35,7 +35,7 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS) ! Work Space ! character(2) :: ordering - integer :: id,i_rsp,n_resp,iw_ref + integer :: id,it,i_rsp,n_resp,iw_ref,index_sort(5) character(10) :: solver character(schlen) :: out_string complex(SP) :: hall(3,2),tmp_var(BSS_n_freqs,3),& @@ -246,6 +246,18 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS) if (data_sort(1)==0._SP) cycle call X_OUTPUT_driver("WRITE E_IP",data=data_sort(:N_BS_E_sorted)) enddo + if (l_BS_esort_indx) then + do id=1,min(BS_H_dim,1000) + data_sort(1)=real(BS_E_sorted(id,1,1),SP) + if (data_sort(1)==0._SP) cycle + do it=1,18 + if (BS_E_sorted_indx(id,it,1)==0) exit + data_sort(1)=real(BS_E_sorted(id,1,1),SP) + index_sort(1:5)=real(BS_E_sorted_indx(id,it,1:5),SP) + call X_OUTPUT_driver("WRITE E_INDX_IP",data=data_sort(1:1),indexes=index_sort) + enddo + enddo + endif endif ! ! E sorted diff --git a/src/output/X_OUTPUT_driver.F b/src/output/X_OUTPUT_driver.F index d35a546030..de20ff34f0 100644 --- a/src/output/X_OUTPUT_driver.F +++ b/src/output/X_OUTPUT_driver.F @@ -5,7 +5,7 @@ ! ! Authors (see AUTHORS file for details): AM ! -subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) +subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes) ! use pars, ONLY:schlen,SP use D_lattice, ONLY:l_0D,l_1D,l_2D @@ -14,7 +14,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) use electrons, ONLY:n_spinor use PHOTOLUM, ONLY:PL use BS, ONLY:l_BS_kerr,l_BS_magnons,l_BS_dichroism,BS_K_coupling,& -& l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort +& l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort_indx,l_BS_esort use BS_solvers, ONLY:l_abs_prop_chi_bse,l_eels_can_be_computed use stderr, ONLY:STRING_match,STRING_split,intc,STRING_remove use OUTPUT, ONLY:OUTPUT_driver @@ -30,6 +30,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) integer, optional :: IQ,IG type(IO_desc),optional :: DESC real(SP), optional :: data(:) + integer, optional :: indexes(:) ! ! Work Space integer ::it,itp,ik,ic @@ -39,20 +40,21 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) ! if (STRING_match(WHAT,"DEF")) then call ELEMENTAL_init(1,"fxc","Exchange-Correlation Kernel","optics") - call ELEMENTAL_init(2,"E_IP","Sorted IP Energies","sort") - call ELEMENTAL_init(3,"Esort","Sorted Interacting Energies","sort") - call ELEMENTAL_init(4,"pl","Photoluminescence","PL") - call ELEMENTAL_init(5,"eps","Absorption","optics") - call ELEMENTAL_init(6,"refl","Reflectivity","optics") - call ELEMENTAL_init(7,"eel","Electron Energy Loss","optics") - call ELEMENTAL_init(8,"alpha","Polarizability ( -<> )","optics") - call ELEMENTAL_init(9,"off","Off-diagonal Absorption","kerr") - call ELEMENTAL_init(10,"beta","Off-diagonal Polarizability","kerr") - call ELEMENTAL_init(11,"moke","MOKE parameters","kerr") - call ELEMENTAL_init(12,"jdos","Joint Density of states","optics") - call ELEMENTAL_init(13,"CD","Dichroism","dichroism") - call ELEMENTAL_init(14,"Spm","Magnons (S+-)","magnons") - call ELEMENTAL_init(15,"Smp","Magnons (S-p)","magnons") + call ELEMENTAL_init(2,"E_INDX_IP","Sorted IP Energies indexes","indexes") + call ELEMENTAL_init(3,"E_IP","Sorted IP Energies","sort") + call ELEMENTAL_init(4,"Esort","Sorted Interacting Energies","sort") + call ELEMENTAL_init(5,"pl","Photoluminescence","PL") + call ELEMENTAL_init(6,"eps","Absorption","optics") + call ELEMENTAL_init(7,"refl","Reflectivity","optics") + call ELEMENTAL_init(8,"eel","Electron Energy Loss","optics") + call ELEMENTAL_init(9,"alpha","Polarizability ( -<> )","optics") + call ELEMENTAL_init(10,"off","Off-diagonal Absorption","kerr") + call ELEMENTAL_init(11,"beta","Off-diagonal Polarizability","kerr") + call ELEMENTAL_init(12,"moke","MOKE parameters","kerr") + call ELEMENTAL_init(13,"jdos","Joint Density of states","optics") + call ELEMENTAL_init(14,"CD","Dichroism","dichroism") + call ELEMENTAL_init(15,"Spm","Magnons (S+-)","magnons") + call ELEMENTAL_init(16,"Smp","Magnons (S-p)","magnons") endif ! if (STRING_match(WHAT,"INIT")) then @@ -77,7 +79,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) if (l_flag) call X_obs_onoff("+ refl") endif if (l_BS_jdos) call X_obs_onoff("+ jdos") - if (l_BS_esort) call X_obs_onoff("+ Esort E_IP") + if (l_BS_esort) call X_obs_onoff("+ Esort E_IP") + if (l_BS_esort_indx) call X_obs_onoff("+ E_INDX_IP") if (l_BS_kerr) then if (X_do_obs("alpha")) call X_obs_onoff("+ beta") if (X_do_obs("eps")) call X_obs_onoff("+ off moke") @@ -98,9 +101,9 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) call warning("eps & alpha evaluation switched off for slepc") endif endif - if (STRING_match(solver,"Inversion")) call X_obs_onoff("- Esort E_IP kerr magnons jdos") - if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort jdos E_IP") - if (STRING_match(solver,"Diago").or.STRING_match(solver,"Slepc")) call X_obs_onoff("- E_IP") + if (STRING_match(solver,"Inversion")) call X_obs_onoff("- Esort E_IP E_INDX_IP kerr magnons jdos") + if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort jdos E_IP E_INDX_IP") + if (STRING_match(solver,"Diago").or.STRING_match(solver,"Slepc")) call X_obs_onoff("- E_IP E_INDX_IP") endif ! ! Headers... @@ -111,14 +114,18 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) X_obs(1)%COL_header(1) = 'E' X_obs(1)%COL_header(2:3) = (/'Re(q^2Fxc)','Re(q^2Fxc)'/) endif + if (X_do_obs("indexes")) then + X_obs(2)%N_columns=6 + X_obs(2)%COL_header(1:6) = (/' E ','ikbz','ib1 ','ib2 ','isp1','isp2'/) + endif if (X_do_obs("Esort")) then - do ic=2,3 + do ic=3,4 X_obs(ic)%N_columns=2 X_obs(ic)%COL_header(1:2) = (/'E ','N_deg'/) enddo if (BS_K_coupling) then - X_obs(3)%N_columns=3 - X_obs(3)%COL_header(1:3) = (/'Re(E)','Im(E)','N_deg'/) + X_obs(4)%N_columns=3 + X_obs(4)%COL_header(1:3) = (/'Re(E)','Im(E)','N_deg'/) endif obs_with_res=[character(5) :: "eps","alpha","Spm","Smp","off","beta","CD" ] do it=1,N_X_obs @@ -128,16 +135,16 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) l_print_res=l_print_res.or.trim(X_obs(it)%what)==trim(obs_with_res(itp)) enddo if (.not.l_print_res) cycle - do ic=2,3 + do ic=3,4 X_obs(ic)%N_columns=X_obs(ic)%N_columns+1 X_obs(ic)%COL_header(X_obs(ic)%N_columns) =trim(X_obs(it)%what) enddo enddo endif if (X_do_obs("PL")) then - X_obs(4)%N_columns=3 - X_obs(4)%COL_header(1) = 'E' - X_obs(4)%COL_header(2:3) = (/'PL ','PL_o'/) + X_obs(5)%N_columns=3 + X_obs(5)%COL_header(1) = 'E' + X_obs(5)%COL_header(2:3) = (/'PL ','PL_o'/) endif ! ! ... pre-formatted @@ -145,7 +152,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) BASIC_col(2:3) = (/'Im(XXX)', 'Re(XXX)'/) BASIC_col(4:5) = (/'Im(XXX_o)', 'Re(XXX_o)'/) BASIC_col(6:7) = (/'Im(XXX`)','Re(XXX`)'/) - do it=5,N_X_obs + do it=6,N_X_obs X_obs(it)%N_columns=7 do ic=1,X_obs(it)%N_columns X_obs(it)%COL_header(ic)=STRING_remove(BASIC_col(ic),"XXX",trim(X_obs(it)%what)) @@ -198,6 +205,11 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data) do ic=2,size(data) call OUTPUT_driver(trim(X_obs(it)%prefix),R_VALUES=data(ic:ic),TITLES=X_obs(it)%COL_header(ic:ic)) enddo + if (present(indexes)) then + do ic=1,size(indexes) + call OUTPUT_driver(trim(X_obs(it)%prefix),I_VALUES=indexes(ic:ic),TITLES=X_obs(it)%COL_header(ic+1:ic+1)) + enddo + endif call OUTPUT_driver(trim(X_obs(it)%prefix),action="write") enddo endif diff --git a/src/parallel/.objects b/src/parallel/.objects index 4d1c143fe4..c901f08ddc 100644 --- a/src/parallel/.objects +++ b/src/parallel/.objects @@ -1,6 +1,6 @@ NL_objs = #if defined _NL -NL_objs = PARALLEL_global_Non_Linear.o +NL_objs = PARALLEL_global_Non_Linear.o #endif #if _RT RT_objs = PARALLEL_global_Real_Time.o @@ -24,7 +24,7 @@ objs = check_for_a_redundant_IO.o \ PARALLEL_global_BZINDX.o \ PARALLEL_global_defaults.o PARALLEL_global_reset.o PARALLEL_global_dimensions.o PARALLEL_global_ScaLapacK.o \ PARALLEL_global_Response_G.o PARALLEL_global_Self_Energy.o PARALLEL_global_Response_T.o PARALLEL_global_Response_T_transitions.o \ - PARALLEL_global_Response_IO.o PARALLEL_global_DIPOLES.o \ + PARALLEL_global_Response_IO.o PARALLEL_global_DIPOLES.o PARALLEL_global_Oscillators.o \ PARALLEL_global_indexes.o \ PARALLEL_index.o PARALLEL_WF_distribute.o \ PARALLEL_Haydock_VEC_COMMs.o PARALLEL_FREQS_setup.o $(RT_objs) $(NL_objs) diff --git a/src/parallel/DOUBLE_project.dep b/src/parallel/DOUBLE_project.dep index 5c51f98854..1e88f54b17 100644 --- a/src/parallel/DOUBLE_project.dep +++ b/src/parallel/DOUBLE_project.dep @@ -19,6 +19,7 @@ PARALLEL_global_BZINDX.o PARALLEL_global_DIPOLES.o PARALLEL_global_Non_Linear.o + PARALLEL_global_Oscillators.o PARALLEL_global_Real_Time.o PARALLEL_global_Response_G.o PARALLEL_global_Response_IO.o diff --git a/src/parallel/PARALLEL_global_Non_Linear.F b/src/parallel/PARALLEL_global_Non_Linear.F index 7b33d6fa8b..e18fd65e70 100644 --- a/src/parallel/PARALLEL_global_Non_Linear.F +++ b/src/parallel/PARALLEL_global_Non_Linear.F @@ -5,7 +5,7 @@ ! ! Authors (see AUTHORS file for details): CA MG DS ! -subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) +subroutine PARALLEL_global_Non_Linear(E,Xk,q) ! use electrons, ONLY:levels use R_lattice, ONLY:bz_samp @@ -35,7 +35,6 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) ! type(levels) :: E type(bz_samp) :: Xk,q - character(*) :: ENVIRONMENT ! ! Work space ! @@ -59,7 +58,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) call PARALLEL_index(PAR_IND_freqs,(/n_runs/),COMM=PAR_COM_freqs_INDEX) PAR_IND_freqs_ID=PAR_COM_freqs_INDEX%CPU_id PAR_n_freqs =PAR_IND_freqs%n_of_elements(PAR_IND_Q_ibz_ID+1) - call PARALLEL_live_message("Freqs",ENVIRONMENT=ENVIRONMENT, & + call PARALLEL_live_message("Freqs",ENVIRONMENT="Non_Linear", & & LOADED=PAR_IND_freqs%n_of_elements(PAR_IND_freqs_ID+1), & & TOTAL=n_runs,NCPU=PAR_COM_freqs_INDEX%n_CPU) ! @@ -87,7 +86,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) ! ! Overlap indeces for WF distribution ! - call PARALLEL_live_message("H(ibz)",ENVIRONMENT=ENVIRONMENT,& + call PARALLEL_live_message("H(ibz)",ENVIRONMENT="Non_Linear",& LOADED=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1),TOTAL=Xk%nibz,& NCPU=PAR_COM_Xk_ibz_INDEX%n_CPU) ! @@ -116,7 +115,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) PAR_IND_QP%element_1D(i_qp)=.TRUE. PAR_IND_QP%n_of_elements=PAR_nQP enddo - call PARALLEL_live_message("QPs",ENVIRONMENT=ENVIRONMENT,LOADED=PAR_nQP,TOTAL=QP_n_states) + call PARALLEL_live_message("QPs",ENVIRONMENT="Non_Linear",LOADED=PAR_nQP,TOTAL=QP_n_states) !......................................................................... ! "COLLISIONS" diff --git a/src/parallel/PARALLEL_global_Oscillators.F b/src/parallel/PARALLEL_global_Oscillators.F new file mode 100644 index 0000000000..badc53f43c --- /dev/null +++ b/src/parallel/PARALLEL_global_Oscillators.F @@ -0,0 +1,110 @@ +! +! Copyright (C) 2000-2022 the YAMBO team +! http://www.yambo-code.org +! +! Authors (see AUTHORS file for details): CA MG DS +! +! This file is distributed under the terms of the GNU +! General Public License. You can redistribute it and/or +! modify it under the terms of the GNU General Public +! License as published by the Free Software Foundation; +! either version 2, or (at your option) any later version. +! +! This program is distributed in the hope that it will +! be useful, but WITHOUT ANY WARRANTY; without even the +! implied warranty of MERCHANTABILITY or FITNESS FOR A +! PARTICULAR PURPOSE. See the GNU General Public License +! for more details. +! +! You should have received a copy of the GNU General Public +! License along with this program; if not, write to the Free +! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, +! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt. +! +subroutine PARALLEL_global_Oscillators(E,Xk,q) + ! + use electrons, ONLY:levels + use R_lattice, ONLY:bz_samp + use collision_ext, ONLY:COLL_bands + use IO_int, ONLY:IO_and_Messaging_switch + use parallel_int, ONLY:PARALLEL_index,PARALLEL_assign_chains_and_COMMs,PARALLEL_live_message + use parallel_m, ONLY:ncpu,COMM_copy,PAR_build_index,PAR_INDEX_copy + ! COMMUNICATORS + use parallel_m, ONLY:PAR_COM_G_b_A2A,PAR_COM_Xk_ibz_INDEX,PAR_COM_WF_k_INDEX,PAR_G_bands_INDEX, & + & PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_INDEX,PAR_n_bands,n_WF_bands_to_load + ! IND + use parallel_m, ONLY:PAR_IND_Xk_ibz,PAR_IND_G_b,PAR_IND_WF_k,PAR_IND_Xk_bz + ! INDEX + use parallel_m, ONLY:PAR_Xk_ibz_index,PAR_Xk_bz_index + ! DIMENSIONS + use parallel_m, ONLY:PAR_nG_bands,PAR_nQP,PAR_Xk_nbz,PAR_Xk_nibz + ! ID's + use parallel_m, ONLY:PAR_IND_G_b_ID,PAR_IND_WF_k_ID,& +& PAR_IND_Xk_bz_ID,PAR_IND_Xk_ibz_ID + use openmp, ONLY:n_threads_OSCLL,OPENMP_set_threads + use QP_m, ONLY:QP_n_states,QP_table + ! +#include + ! + type(levels) :: E + type(bz_samp) :: Xk,q + ! + ! Work space + ! + integer :: nb_mat + ! + CALL PARALLEL_structure(2,(/"k ","b "/)) + ! + call PARALLEL_assign_chains_and_COMMs(2,COMM_index_1=PAR_COM_Xk_ibz_INDEX, & +& COMM_index_2=PAR_COM_G_b_INDEX,& +& COMM_A2A_1=PAR_COM_Xk_ibz_A2A) + ! + ! The routine PARALLEL_assign_chains_and_COMMs cannot define COMMUNICATORS for internal + ! A2A when there is no internal distribution + ! +if (PAR_COM_Xk_ibz_INDEX%n_CPU==1) then + call COMM_copy(PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_A2A) + endif + ! + ! K-points + ! + call PARALLEL_index(PAR_IND_Xk_ibz,(/Xk%nibz/),COMM=PAR_COM_Xk_ibz_INDEX) + PAR_IND_Xk_ibz_ID=PAR_COM_Xk_ibz_INDEX%CPU_id + PAR_Xk_nibz=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1) + call PARALLEL_live_message("K-points",ENVIRONMENT="Oscillators", & +& LOADED=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1), & +& TOTAL=Xk%nibz,NCPU=PAR_COM_Xk_ibz_INDEX%n_CPU) + ! + YAMBO_ALLOC(PAR_Xk_ibz_index,(Xk%nibz)) + call PAR_build_index(PAR_IND_Xk_ibz,Xk%nibz,PAR_Xk_ibz_index,PAR_Xk_nibz) + ! + ! Bands + ! + nb_mat=(COLL_bands(2)-COLL_bands(1)+1)**2 + ! + n_WF_bands_to_load=PAR_n_bands(2) + ! + call PARALLEL_index(PAR_IND_G_b,(/nb_mat/),COMM=PAR_COM_G_b_INDEX) + PAR_IND_G_b_ID=PAR_COM_G_b_INDEX%CPU_id + PAR_nG_bands=PAR_IND_G_b%n_of_elements(PAR_IND_G_b_ID+1) + YAMBO_ALLOC(PAR_G_bands_INDEX,(nb_mat)) + call PAR_build_index(PAR_IND_G_b,nb_mat,PAR_G_bands_index,PAR_nG_bands) + ! + call PARALLEL_live_message("OSCL bands",ENVIRONMENT="Oscillators",& +& LOADED=PAR_IND_G_b%n_of_elements(PAR_IND_G_b_ID+1),& +& TOTAL=nb_mat,NCPU=PAR_COM_G_b_INDEX%n_CPU) + ! + ! Copy to WF + ! + call COMM_copy(PAR_COM_Xk_ibz_INDEX,PAR_COM_WF_k_INDEX) + PAR_IND_WF_k_ID=PAR_COM_WF_k_INDEX%CPU_id + ! + ! Io and messaging + ! + call IO_and_Messaging_switch("+io_out",CONDITION=PAR_COM_G_b_INDEX%CPU_id==0) + ! + call IO_and_Messaging_switch("+output",CONDITION=PAR_COM_G_b_INDEX%CPU_id==0) + ! + call OPENMP_set_threads(n_threads_in=n_threads_OSCLL) + ! +end subroutine PARALLEL_global_Oscillators diff --git a/src/parallel/PARALLEL_global_defaults.F b/src/parallel/PARALLEL_global_defaults.F index e2b9453bf6..3958936fe9 100644 --- a/src/parallel/PARALLEL_global_defaults.F +++ b/src/parallel/PARALLEL_global_defaults.F @@ -71,6 +71,8 @@ subroutine PARALLEL_global_defaults(ENVIRONMENT) call GIMME_a_parallel_proposal(3,(/"q","k","b"/)) case("Non_Linear") call GIMME_a_parallel_proposal(2,(/" k"," w"/)) + case("Oscillators") + call GIMME_a_parallel_proposal(2,(/" k"," b"/)) case("ScaLapacK") CPU_structure(i_PAR_structure)%ROLE(1)="p" CPU_structure(i_PAR_structure)%CPU(1)=ncpu diff --git a/src/parallel/PARALLEL_global_dimensions.F b/src/parallel/PARALLEL_global_dimensions.F index 8f94695e7c..f7d7a00a15 100644 --- a/src/parallel/PARALLEL_global_dimensions.F +++ b/src/parallel/PARALLEL_global_dimensions.F @@ -154,6 +154,11 @@ subroutine PARALLEL_global_dimensions(E,Xk,q,ENVIRONMENT) !----------- if (ENVIRONMENT=="Self_Energy" ) PAR_QP_range=QP_n_states ! + ! Oscillators +#if defined _NL || defined _RT + if (ENVIRONMENT=="Oscillators" ) PAR_n_bands =COLL_bands +#endif + ! #if defined _NL if (ENVIRONMENT=="Non_Linear" ) PAR_n_freqs =n_runs #endif diff --git a/src/parallel/PARALLEL_global_indexes.F b/src/parallel/PARALLEL_global_indexes.F index 7674d792af..a0f597799a 100644 --- a/src/parallel/PARALLEL_global_indexes.F +++ b/src/parallel/PARALLEL_global_indexes.F @@ -9,7 +9,7 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre ! ! LOGICALS use parallel_m, ONLY:l_par_X_T,l_par_RT,l_par_SE,l_par_X_G,l_par_X_G_finite_q,& -& l_par_DIP,l_par_BZINDX,l_par_NL,l_par_SC +& l_par_DIP,l_par_BZINDX,l_par_NL,l_par_SC,l_par_OSCLL ! COMMUNICATORS use parallel_m, ONLY:PAR_COM_CON_INDEX_X,PAR_COM_VAL_INDEX_X ! IND @@ -81,6 +81,7 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre l_par_RT =ENVIRONMENT=="Real_Time" l_par_NL =ENVIRONMENT=="Non_Linear" l_par_SC =ENVIRONMENT=="Self_Energy" + l_par_OSCLL =ENVIRONMENT=="Oscillators" ! !================================== ! USER provided PARALLEL structure @@ -148,7 +149,8 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre #endif ! #if defined _NL - if ( ENVIRONMENT=="Non_Linear") call PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT) + if ( ENVIRONMENT=="Non_Linear") call PARALLEL_global_Non_Linear(E,Xk,q) + if ( ENVIRONMENT=="Oscillators") call PARALLEL_global_Oscillators(E,Xk,q) #endif ! #if defined _SCALAPACK diff --git a/src/setup/PARALLEL_and_IO_Setup.F b/src/setup/PARALLEL_and_IO_Setup.F index 8ab4a2b090..bc8c4931f9 100644 --- a/src/setup/PARALLEL_and_IO_Setup.F +++ b/src/setup/PARALLEL_and_IO_Setup.F @@ -22,7 +22,7 @@ subroutine PARALLEL_and_IO_Setup(en,k) use parallel_int, ONLY:PP_bcast,PP_redux_wait use parser_m, ONLY:parser use wave_func, ONLY:WF_buffered_IO - use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads + use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads,n_threads_OSCLL use memory, ONLY:USER_MEM_limit_string use LIVE_t, ONLY:USER_wall_time_string use cuda_m, ONLY:cuda_visible_devices,have_cuda_devices,cuda_gpu_subscription @@ -84,7 +84,7 @@ subroutine PARALLEL_and_IO_Setup(en,k) ! CPU structure REPORT !====================== ! - n_max_threads=maxval((/n_threads,n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL/)) + n_max_threads=maxval((/n_threads,n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads_OSCLL/)) ! if (ncpu>1.or.n_max_threads>1) then ! diff --git a/src/tddft/TDDFT_ALDA_R_space.F b/src/tddft/TDDFT_ALDA_R_space.F deleted file mode 100644 index b1da0c3f50..0000000000 --- a/src/tddft/TDDFT_ALDA_R_space.F +++ /dev/null @@ -1,138 +0,0 @@ -! -! License-Identifier: GPL -! -! Copyright (C) 2015 The Yambo Team -! -! Authors (see AUTHORS file for details): AM -! -function TDDFT_ALDA_R_space(is,os,isp,osp,tddft_wf,mode) - ! - ! Calculates the F_xc scattering - ! - ! mode 1 - ! - ! (ic(1),ik(1),is(1)) --<--:...:--<-- (ic(2),ik(2),is(2)) - ! :Fxc: - ! (iv(1),ik(3),is(3)) -->--:...:-->-- (iv(2),ik(4),is(4)) - ! - ! mode 2 - ! - ! (ic(1),ik(1),is(1)) --<--:...:--<-- (iv(2),ik(2),is(2)) - ! :Fxc: - ! (iv(1),ik(3),is(3)) -->--:...:-->-- (ic(2),ik(4),is(4)) - ! - use pars, ONLY:SP,pi,cZERO - use FFT_m, ONLY:fft_size - use xc_functionals, ONLY:F_xc_mat - use BS, ONLY:l_BS_magnons - use wrapper_omp, ONLY:V_dot_V_omp - use electrons, ONLY:n_spinor,n_sp_pol,n_spin - use timing_m, ONLY:timing - !use interfaces, ONLY:WF_apply_symm - use TDDFT, ONLY:tddft_wf_t - ! - implicit none - ! - complex(SP) :: TDDFT_ALDA_R_space - ! - integer, intent(in) :: is(4),os(4),isp(4),osp(4) - character(3), intent(in) :: mode - type(tddft_wf_t), target, intent(inout) :: tddft_wf - ! - ! Work Space - ! - integer :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y - ! - complex(SP), pointer :: rhotwr1_p(:) - complex(SP), pointer :: rhotwr2_p(:) - complex(SP), pointer :: WF_symm1_p(:,:) - complex(SP), pointer :: WF_symm2_p(:,:) - ! - call timing('T_space ALDA scatt.',OPR='start') - ! - if ((is(4)/=os(4).or.isp(4)/=osp(4)).and.((.not.l_BS_magnons).or. n_sp_pol==1)) call error(" TDDFT_ALDA_R_space: wrong spin index") - if ((is(4)==os(4).or.isp(4)==osp(4)).and.(( l_BS_magnons).and.n_sp_pol==2)) call error(" TDDFT_ALDA_R_space: wrong spin index") - if (mode/="RES" .and. mode/="CPL" ) call error(" TDDFT_ALDA_R_space: unkown mode") - ! - TDDFT_ALDA_R_space=cZERO - ! - WF_symm1_p => tddft_wf%WF_symm1 - WF_symm2_p => tddft_wf%WF_symm2 - rhotwr1_p => tddft_wf%rhotwr1 - rhotwr2_p => tddft_wf%rhotwr2 - ! - ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation - ! - ! - call WF_apply_symm_cpu(is,WF_symm1_p) - call WF_apply_symm_cpu(os,WF_symm2_p) - ! - if(n_spinor==1) rhotwr1_p(:)=conjg(WF_symm1_p(:,1))*WF_symm2_p(:,1) - ! - if(n_spinor==2) then - do i_spinor=1,n_spinor - do j_spinor=1,n_spinor - do ifft=1,fft_size - irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size - rhotwr1_p(irhotw)=conjg(WF_symm1_p(ifft,i_spinor))*WF_symm2_p(ifft,j_spinor) - enddo - enddo - enddo - endif - ! - if (mode=="RES") then - call WF_apply_symm_cpu(isp,WF_symm1_p) - call WF_apply_symm_cpu(osp,WF_symm2_p) - else if (mode=="CPL") then - call WF_apply_symm_cpu(isp,WF_symm2_p) - call WF_apply_symm_cpu(osp,WF_symm1_p) - endif - ! - if( n_spin==1 ) rhotwr2_p(:)=F_xc_mat(:,1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)) - ! - if(n_sp_pol==2) then - ! This takes into account the different conventions between the definition of the spin indexes - ! between yambo and the derivation of fxc in spin space for magnons - ! This happens because in the yambo convention the left indexes "v sigma_v, c sigma_c, k" are inverted - ! with respect to the standard convention, i.e. "c sigma_c, v sigma_v, k" - ! As a result f_\up\dn,\dn\up is in yambo f_\dn\up,\dn\up and so on - is_yambo=os(4) - os_yambo=is(4) - ! To check/fix the spinorial version - rhotwr2_p(:)=F_xc_mat(:,is_yambo,os_yambo,isp(4),osp(4))*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)) - endif - ! - if(n_spinor==2) then - rhotwr2_p=cZERO - do i_spinor=1,n_spinor - do j_spinor=1,n_spinor - i_spinor_y=j_spinor - j_spinor_y=i_spinor - do ip_spinor=1,n_spinor - do jp_spinor=1,n_spinor - do ifft=1,fft_size - irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size - rhotwr2_p(irhotw)=rhotwr2_p(irhotw)+ & - & F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* & - & WF_symm1_p(ifft,ip_spinor)*conjg(WF_symm2_p(ifft,jp_spinor)) - enddo - enddo - enddo - enddo - enddo - endif - ! - ! SUM - !===== - TDDFT_ALDA_R_space=V_dot_V_omp(fft_size*n_spinor*n_spinor,rhotwr1_p,rhotwr2_p) - ! - ! tddft_alda_r_space should be mutiplied by X, it is mutiplied by Co in K - ! X = fft_size*spin_occ/DL_vol/Nq - ! Co = 4*pi*spin_occ/DL_vol/Nq - ! --> X/Co = fft_size/4/pi - ! - TDDFT_ALDA_R_space=TDDFT_ALDA_R_space*real(fft_size,SP)/4._SP/pi - ! - call timing('T_space ALDA scatt.',OPR='stop') - ! -end function diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F index fe6d5270c3..3ce0e7f513 100644 --- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F +++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F @@ -7,7 +7,7 @@ ! #include ! -subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) +subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode) ! ! Calculates the F_xc scattering ! @@ -16,6 +16,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) ! use pars, ONLY:SP,DP,pi,cZERO use FFT_m, ONLY:fft_size + use wave_func, ONLY:WF + use interfaces, ONLY:WF_load,WF_free use R_lattice, ONLY:qindx_X,bz_samp,minus_G use BS, ONLY:l_BS_magnons,BS_T_grp,BS_bands use electrons, ONLY:n_spinor,n_sp_pol,n_spin @@ -27,12 +29,14 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) implicit none ! type(bz_samp),intent(in) :: Xk - integer, intent(in) :: iq,i_T_grp + integer, intent(in) :: iq,i_T_grp,NG(2) + logical, intent(in) :: l_bs_tddft_wf_in_loop character(1), intent(in) :: mode type(tddft_wf_t), target, intent(inout) :: tddft_wf ! ! Work Space ! + logical :: l_load_WFs integer :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y integer :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,& & i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2) @@ -51,8 +55,6 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) WF_symm2_p=cZERO rhotwr_p= cZERO ! - ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation - ! #if defined(__NOTNOW) && ! defined(_CUDA) !$omp parallel default(shared), private( K_EXCH_collision, & !$omp & i_T_el,i_k_bz,i_k,i_s, i_p_bz,i_p,i_sp, i_v,i_c,i_sp_c,i_sp_v, & @@ -96,6 +98,20 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) os(1)=i_c endif ! + l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p)) + if (l_load_WFs) then +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp critical +#endif + if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) + NK=(/min(i_k,i_p),max(i_k,i_p)/) + call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,& + & space='R',title="Kernel exch",keep_states_to_load=.true.) +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp end critical +#endif + endif + ! if ( (is(4)/=os(4)) .and. ((.not.l_BS_magnons).or. n_sp_pol==1) ) call error(" TDDFT_ALDA_R_space: wrong spin index") if ( (is(4)==os(4)) .and. (( l_BS_magnons).and.n_sp_pol==2) ) call error(" TDDFT_ALDA_R_space: wrong spin index") ! @@ -113,7 +129,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) do j_spinor=1,n_spinor ! do ifft=1,fft_size - rhotwr_p(ifft)=cmplx(WF_symm1_p(ifft,i_spinor)*conjg(WF_symm2_p(ifft,j_spinor)),kind=DP) + rhotwr_p(ifft)=cmplx(WF_symm1_p(ifft,i_spinor)*conjg(WF_symm2_p(ifft,j_spinor)),kind=DP) enddo call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_L(:,i_T_el,i_spinor,j_spinor)) ! @@ -123,6 +139,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode) ! enddo ! + if(l_bs_tddft_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) + ! nullify(WF_symm1_p) nullify(WF_symm2_p) nullify(rhotwr_p) @@ -202,31 +220,3 @@ subroutine perform_fft_3d(qs,rhotw,O_x) #endif ! end subroutine perform_fft_3d -! -! -subroutine TDDFT_oscillators_alloc_L(iGL) - use electrons, ONLY:n_spin,n_spinor - use BS, ONLY:BS_T_grp,BS_n_g_fxc -#include - integer, intent(in) :: iGL - if (BS_T_grp(iGL)%size>0) then - YAMBO_ALLOC(BS_T_grp(iGL)%O_tddft_L,(BS_n_g_fxc,BS_T_grp(iGL)%size,n_spinor,n_spinor)) -#ifdef _CUDA - allocate(BS_T_grp(iGL)%O_tddft_L_d,mold=BS_T_grp(iGL)%O_tddft_L) -#endif - endif -end subroutine TDDFT_oscillators_alloc_L -! -! -subroutine TDDFT_oscillators_free_L(iG_ref) - use BS, ONLY:BS_T_grp -#include - integer, intent(in) :: iG_ref - integer :: iGL - do iGL=iG_ref,1,-1 - YAMBO_FREE(BS_T_grp(iGL)%O_tddft_L) -#ifdef _CUDA - if (allocated(BS_T_grp(iGL)%O_tddft_L_d)) deallocate(BS_T_grp(iGL)%O_tddft_L_d) -#endif - enddo -end subroutine TDDFT_oscillators_free_L diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F index 10a0677f88..0c815df0e5 100644 --- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F +++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F @@ -7,7 +7,7 @@ ! #include ! -subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) +subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode) ! ! Calculates the F_xc scattering ! @@ -16,7 +16,9 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) ! use pars, ONLY:SP,DP,pi,cZERO use FFT_m, ONLY:fft_size + use wave_func, ONLY:WF use xc_functionals, ONLY:F_xc_mat + use interfaces, ONLY:WF_load,WF_free use R_lattice, ONLY:qindx_X,bz_samp,minus_G use BS, ONLY:l_BS_magnons,BS_T_grp,BS_bands use electrons, ONLY:n_spinor,n_sp_pol,n_spin @@ -28,20 +30,26 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) implicit none ! type(bz_samp),intent(in) :: Xk - integer, intent(in) :: iq,i_T_grp + integer, intent(in) :: iq,i_T_grp,NG(2) + logical, intent(in) :: l_bs_tddft_wf_in_loop character(1), intent(in) :: mode type(tddft_wf_t), target, intent(inout) :: tddft_wf ! ! Work Space ! + logical :: l_load_WFs integer :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y,& & i_sp_loop,o_sp_loop integer :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,& & i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2) ! - complex(DP), pointer DEV_ATTR :: rhotwr_p(:) - complex(SP), pointer DEV_ATTR :: WF_symm1_p(:,:) - complex(SP), pointer DEV_ATTR :: WF_symm2_p(:,:) + complex(DP), pointer :: rhotwr_p(:) + complex(SP), pointer :: WF_symm1_p(:,:) + complex(SP), pointer :: WF_symm2_p(:,:) + ! +#if defined _CUDA + call error("TDDFT ALDA Osc. not GPU ported") +#endif ! call timing('T_space ALDA Osc.',OPR='start') ! @@ -100,6 +108,20 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) os(1)=i_c endif ! + l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p)) + if (l_load_WFs) then +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp critical +#endif + if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) + NK=(/min(i_k,i_p),max(i_k,i_p)/) + call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,& + & space='R',title="Kernel exch",keep_states_to_load=.true.) +#if defined(__NOTNOW) && ! defined(_CUDA) + !$omp end critical +#endif + endif + ! if ( (is(4)/=os(4)) .and. ((.not.l_BS_magnons).or. n_sp_pol==1) ) call error(" TDDFT_ALDA_R_space: wrong spin index") if ( (is(4)==os(4)) .and. (( l_BS_magnons).and.n_sp_pol==2) ) call error(" TDDFT_ALDA_R_space: wrong spin index") ! @@ -126,7 +148,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) os_yambo=i_sp_loop ! To check/fix the spinorial version rhotwr_p(:)=cmplx(F_xc_mat(:,is_yambo,os_yambo,is(4),os(4))* & - & WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP) + & WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP) call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_R(:,i_T_el,i_sp_loop,o_sp_loop)) ! enddo @@ -157,6 +179,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) ! enddo ! + if(l_bs_tddft_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.) + ! nullify(WF_symm1_p) nullify(WF_symm2_p) nullify(rhotwr_p) @@ -164,31 +188,3 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode) call timing('T_space ALDA Osc.',OPR='stop') ! end subroutine TDDFT_ALDA_eh_space_G_collisions_R -! -! -subroutine TDDFT_oscillators_alloc_R(iGR) - use electrons, ONLY:n_spin,n_spinor - use BS, ONLY:BS_T_grp,BS_n_g_fxc -#include - integer, intent(in) :: iGR - if (BS_T_grp(iGR)%size>0) then - YAMBO_ALLOC(BS_T_grp(iGR)%O_tddft_R,(BS_n_g_fxc,BS_T_grp(iGR)%size,n_spin,n_spin)) -#ifdef _CUDA - allocate(BS_T_grp(iGR)%O_tddft_R_d,mold=BS_T_grp(iGR)%O_tddft_R) -#endif - endif -end subroutine TDDFT_oscillators_alloc_R -! -! -subroutine TDDFT_oscillators_free_R(iG_ref) - use BS, ONLY:BS_T_grp -#include - integer, intent(in) :: iG_ref - integer :: iGR - do iGR=iG_ref,1,-1 - YAMBO_FREE(BS_T_grp(iGR)%O_tddft_R) -#ifdef _CUDA - if (allocated(BS_T_grp(iGR)%O_tddft_R_d)) deallocate(BS_T_grp(iGR)%O_tddft_R_d) -#endif - enddo -end subroutine TDDFT_oscillators_free_R diff --git a/src/wf_and_fft/WF_alloc.F b/src/wf_and_fft/WF_alloc.F index f20538073a..99c8994d47 100644 --- a/src/wf_and_fft/WF_alloc.F +++ b/src/wf_and_fft/WF_alloc.F @@ -5,7 +5,7 @@ ! ! Authors (see AUTHORS file for details): AM ! -subroutine WF_alloc(WF) +subroutine WF_alloc(WF,k_extrema_only) ! use pars, ONLY:cZERO use wave_func, ONLY:WAVEs,states_to_load,wf_ng,wf_ncx @@ -17,50 +17,57 @@ subroutine WF_alloc(WF) #include ! type(WAVEs) :: WF + logical, intent(in) :: k_extrema_only ! ! Work Space ! - integer :: ik,ib,i_sp_pol,N_total,wf_grid_size + integer :: ik,ib,i_sp_pol,NK,N_loaded,N_total,wf_grid_size ! ! Distributed allocation ! if(allocated(states_to_load)) then ! WF%N=0 + N_loaded=0 N_total=0 do i_sp_pol=WF%sp_pol(1),WF%sp_pol(2) do ik=WF%k(1),WF%k(2) do ib=WF%b(1),WF%b(2) N_total=N_total+1 - if (states_to_load(ib,ik,i_sp_pol)) WF%N=WF%N+1 + if (states_to_load(ib,ik,i_sp_pol)) then + N_loaded=N_loaded+1 + if (.not.k_extrema_only) WF%N=WF%N+1 + if ( k_extrema_only.and.(ik==WF%k(1).or.ik==WF%k(2))) WF%N=WF%N+1 + endif enddo enddo enddo ! else ! - WF%N=(WF%b(2)-WF%b(1)+1)*(WF%k(2)-WF%k(1)+1)*(WF%sp_pol(2)-WF%sp_pol(1)+1) + NK=(WF%k(2)-WF%k(1)+1) + if (k_extrema_only) NK=min(NK,2) + WF%N=(WF%b(2)-WF%b(1)+1)*NK*(WF%sp_pol(2)-WF%sp_pol(1)+1) + N_loaded=WF%N N_total=WF%N ! endif ! + call PARALLEL_live_message("Wave-Function states",LOADED=N_loaded,TOTAL=N_total) + ! if (WF%space=='R') wf_grid_size=fft_size if (WF%space=='G') wf_grid_size=wf_ng if (WF%space=='C') wf_grid_size=wf_ncx if (WF%space=='B') wf_grid_size=wf_ncx ! YAMBO_ALLOC(WF%c,(wf_grid_size,n_spinor,WF%N)) + WF%c=cZERO if (have_cuda) then YAMBO_ALLOC_MOLD(WF%c_d,WF%c) + WF%c_d=cZERO endif ! - call PARALLEL_live_message("Wave-Function states",LOADED=WF%N,TOTAL=N_total) - ! YAMBO_ALLOC(WF%index,(WF%b(2),WF%k(2),WF%sp_pol(2))) - ! - WF%c=cZERO - if (have_cuda) WF%c_d=cZERO - ! WF%index=0 ! end subroutine diff --git a/src/wf_and_fft/WF_free.F b/src/wf_and_fft/WF_free.F index 2f6877cf88..c5c19a0ae3 100644 --- a/src/wf_and_fft/WF_free.F +++ b/src/wf_and_fft/WF_free.F @@ -5,7 +5,7 @@ ! ! Authors (see AUTHORS file for details): AM ! -subroutine WF_free(WF,keep_fft) +subroutine WF_free(WF,keep_fft,keep_states_to_load) ! use wave_func, ONLY:WAVEs use pseudo, ONLY:pp_is_uspp,PP_uspp_free @@ -15,14 +15,19 @@ subroutine WF_free(WF,keep_fft) ! type(WAVEs) :: WF logical, optional :: keep_fft + logical, optional :: keep_states_to_load ! ! Work-space ! logical :: keep_fft_ + logical :: keep_states_to_load_ ! keep_fft_=.false. if (present(keep_fft)) keep_fft_=keep_fft ! + keep_states_to_load_=.false. + if (present(keep_states_to_load)) keep_states_to_load_=keep_states_to_load + ! ! USPP if (pp_is_uspp) call PP_uspp_free() ! @@ -44,6 +49,9 @@ subroutine WF_free(WF,keep_fft) WF%b=0 WF%sp_pol=0 WF%space=' ' + ! + if (keep_states_to_load_) return + ! WF%to_load=.TRUE. ! end subroutine diff --git a/src/wf_and_fft/WF_load.F b/src/wf_and_fft/WF_load.F index 0f42af3ce4..52e954fb15 100644 --- a/src/wf_and_fft/WF_load.F +++ b/src/wf_and_fft/WF_load.F @@ -11,6 +11,7 @@ !! @param[in] iGo_max_in ??? !! @param[in] bands_to_load bands range !! @param[in] kpts_to_load k-points range +!! @param[in] k_extrema_only k-points, load only !first and last in range !! @param[in] sp_pol_to_load spin-polarization range !! @param[in] space R = real-space, G = G-space, C = ?, B = ? !! @param[in] title purpose of the wave-function loading @@ -21,12 +22,12 @@ !! !! @param[out] WF Wave-function object ! -subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load,& +subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load,k_extrema_only,& & space,title,impose_free_and_alloc,force_WFo,keep_states_to_load,quiet) ! ! Load and (eventually) FFTs the wavefunctions ! - use pars, ONLY:SP,DP,schlen + use pars, ONLY:SP,DP,schlen,cZERO use com, ONLY:msg,core_io_path,fat_log use vec_operate, ONLY:c2a use stderr, ONLY:intc @@ -58,6 +59,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load integer, optional :: sp_pol_to_load(2) character(*),optional :: space character(*),optional :: title + logical, optional :: k_extrema_only logical, optional :: impose_free_and_alloc logical, optional :: force_WFo logical, optional :: keep_states_to_load @@ -69,16 +71,18 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load character(schlen)::wf_title integer ::i_sp_pol,ikibz,ifrag,ib_grp,ib_to_load,ib,ib1,ib2,i_spinor,ic,ig,igfft,& & iG_max,iG_bounds_loaded(2),iGo_max,sp_pol_to_load_(2),wf_grid_size,& -& npwk +& npwk,N real(SP) ::mndp,mxdp,xk(3) complex(SP) ::c - logical ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,force_WFo_,QUIET_wf,SIZE_msg + logical ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,& + k_extrema_only_,force_WFo_,QUIET_wf,SIZE_msg complex(SP), allocatable :: wf_disk(:,:,:) complex(DP), allocatable :: wf_DP(:) ! complex(DP), allocatable DEV_ATTR :: wf_DP_d(:) complex(SP), allocatable DEV_ATTR :: wf_disk_d(:,:,:) - complex(SP), pointer DEV_ATTR :: wfc_d(:,:,:) + complex(SP), pointer DEV_ATTR :: wfc_d(:,:,:) + ! integer :: WFN ! ! I/O @@ -104,6 +108,9 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load force_WFo_=.false. if(present(force_WFo)) force_WFo_=force_WFo ! + k_extrema_only_=.false. + if(present(k_extrema_only)) k_extrema_only_=k_extrema_only + ! sp_pol_to_load_=(/1,n_sp_pol/) if(present(sp_pol_to_load)) sp_pol_to_load_=sp_pol_to_load ! @@ -182,6 +189,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load ! QUIET_wf = .false. if (kpts_to_load(1)==kpts_to_load(2).and.kpts_to_load(2)>1) QUIET_wf = .true. + if (k_extrema_only_.and.kpts_to_load(1)>1) QUIET_wf = .true. if (present(quiet)) QUIET_wf = quiet if (QUIET_wf) call IO_and_Messaging_switch("-report -log") SIZE_msg=.not.QUIET_wf.and.(any((/fft_dim_loaded/=fft_dim/)).or.any((/fft_dim==0/))) @@ -255,7 +263,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load ! ! Memory allocation ! - call WF_alloc(WF) + call WF_alloc(WF,k_extrema_only_) ! if (WF%space=='R') wf_grid_size=fft_size if (WF%space=='G') wf_grid_size=wf_ng @@ -304,6 +312,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load do ikibz=1,nkibz ! if (any( (/ikibzWF%k(2)/) )) cycle + if (k_extrema_only_ .and. all( (/ikibz>WF%k(1), ikibzmxdp) mxdp=abs(c) if (abs(c)