From 2243a000160f9898cb51197a071709337a0593ae Mon Sep 17 00:00:00 2001 From: Davide Sangalli Date: Sat, 15 Jun 2024 23:00:35 +0200 Subject: [PATCH] Version 5.2.0, Revision 23240, Hash 89334754e0 MODIFIED * configure include/version/version.m4 bse/K_blocks.F bse/K_kernel.F interface/INIT_load.F io_parallel/io_BS_PAR_block.F io_parallel/io_BS_PAR_init.F modules/mod_BS.F Changes: - [yambo] For runs where the BSE matrix is only constructed, e.g. no solver, the BS_blk(iB)%mat are allocated and de-allocated withing the loops over blocks. This saves a lot of memory if used together with the new I/O mode BSKIOmode="write_all_steps" Patch sent by: Davide Sangalli --- configure | 22 ++++++------ include/version/version.m4 | 6 ++-- src/bse/K_blocks.F | 60 +++++++++++++++++++++++-------- src/bse/K_kernel.F | 40 ++++++++++++++------- src/interface/INIT_load.F | 2 +- src/io_parallel/io_BS_PAR_block.F | 24 +++++++++---- src/io_parallel/io_BS_PAR_init.F | 7 ++-- src/modules/mod_BS.F | 1 + 8 files changed, 111 insertions(+), 51 deletions(-) diff --git a/configure b/configure index efc5015e76..894e41728b 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23180 h.e1699b65ed. +# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23240 h.89334754e0. # # Report bugs to . # @@ -610,8 +610,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='Yambo' PACKAGE_TARNAME='yambo' -PACKAGE_VERSION='5.2.0 r.23180 h.e1699b65ed' -PACKAGE_STRING='Yambo 5.2.0 r.23180 h.e1699b65ed' +PACKAGE_VERSION='5.2.0 r.23240 h.89334754e0' +PACKAGE_STRING='Yambo 5.2.0 r.23240 h.89334754e0' PACKAGE_BUGREPORT='yambo@yambo-code.org' PACKAGE_URL='' @@ -1600,7 +1600,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Yambo 5.2.0 r.23180 h.e1699b65ed to adapt to many kinds of systems. +\`configure' configures Yambo 5.2.0 r.23240 h.89334754e0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1666,7 +1666,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Yambo 5.2.0 r.23180 h.e1699b65ed:";; + short | recursive ) echo "Configuration of Yambo 5.2.0 r.23240 h.89334754e0:";; esac cat <<\_ACEOF @@ -1876,7 +1876,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Yambo configure 5.2.0 r.23180 h.e1699b65ed +Yambo configure 5.2.0 r.23240 h.89334754e0 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2505,7 +2505,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was +It was created by Yambo $as_me 5.2.0 r.23240 h.89334754e0, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3263,8 +3263,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu SVERSION="5" SSUBVERSION="2" SPATCHLEVEL="0" -SREVISION="23180" -SHASH="e1699b65ed" +SREVISION="23240" +SHASH="89334754e0" @@ -16578,7 +16578,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was +This file was extended by Yambo $as_me 5.2.0 r.23240 h.89334754e0, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -16642,7 +16642,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -Yambo config.status 5.2.0 r.23180 h.e1699b65ed +Yambo config.status 5.2.0 r.23240 h.89334754e0 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/include/version/version.m4 b/include/version/version.m4 index d464301e5f..41e5835d6a 100644 --- a/include/version/version.m4 +++ b/include/version/version.m4 @@ -1,9 +1,9 @@ -AC_INIT(Yambo, 5.2.0 r.23180 h.e1699b65ed, yambo@yambo-code.org) +AC_INIT(Yambo, 5.2.0 r.23240 h.89334754e0, yambo@yambo-code.org) SVERSION="5" SSUBVERSION="2" SPATCHLEVEL="0" -SREVISION="23180" -SHASH="e1699b65ed" +SREVISION="23240" +SHASH="89334754e0" AC_SUBST(SVERSION) AC_SUBST(SSUBVERSION) AC_SUBST(SPATCHLEVEL) diff --git a/src/bse/K_blocks.F b/src/bse/K_blocks.F index 85b38a22e8..c70f570e7e 100644 --- a/src/bse/K_blocks.F +++ b/src/bse/K_blocks.F @@ -19,7 +19,7 @@ subroutine K_blocks( ) ! complex(SP), allocatable :: mat(:,:) !end type BS_block ! - use pars, ONLY:cZERO,SP,IP,IPL + use pars, ONLY:SP,IP,IPL use parallel_m, ONLY:PAR_IND_T_ordered,myid,ncpu,PAR_COM_HOST use parallel_int, ONLY:PP_wait,PP_redux_wait,PARALLEL_live_message use BS, ONLY:BS_blk,n_BS_blks,n_BS_blks,BS_K_coupling,BS_T_grp,BS_nT_grps,& @@ -128,20 +128,6 @@ subroutine K_blocks( ) ! Allocate the blocks ! YAMBO_PAR_ALLOC_CHECK1(BS_MAT,(/TMP_SIZE/)) - ! - do iB=1,n_BS_blks - YAMBO_ALLOC(BS_blk(iB)%mat,(BS_blk(iB)%size(1),BS_blk(iB)%size(2))) - allocate(BS_blk(iB)%done(BS_blk(iB)%size(1),BS_blk(iB)%size(2))) - BS_blk(iB)%mat=cZERO - BS_blk(iB)%done(:,:)="f" - if (BS_blk(iB)%coordinate(1)==BS_blk(iB)%coordinate(2)) then - diag_size=minval(BS_blk(iB)%size(:)) - allocate(BS_blk(iB)%table(5,diag_size)) - allocate(BS_blk(iB)%E(diag_size)) - BS_blk(iB)%E=-1._SP - BS_blk(iB)%table=0 - endif -enddo ! ! Live Timing Blocks !==================== @@ -170,3 +156,47 @@ subroutine K_blocks( ) call PARALLEL_live_message("Kernel matrix elements",ENVIRONMENT="Response_T_space",LOADED_r=N_Ts_local,TOTAL_r=N_Ts_total) ! end subroutine +! +! +subroutine K_block_alloc( iB ) + ! + use pars, ONLY:SP,cZERO + use BS, ONLY:BS_blk + ! +#include + ! + integer, intent(in) :: iB + ! + integer :: diag_size + ! + YAMBO_ALLOC(BS_blk(iB)%mat,(BS_blk(iB)%size(1),BS_blk(iB)%size(2))) + allocate(BS_blk(iB)%done(BS_blk(iB)%size(1),BS_blk(iB)%size(2))) + BS_blk(iB)%mat=cZERO + BS_blk(iB)%done(:,:)="f" + if (BS_blk(iB)%coordinate(1)==BS_blk(iB)%coordinate(2)) then + diag_size=minval(BS_blk(iB)%size(:)) + allocate(BS_blk(iB)%table(5,diag_size)) + allocate(BS_blk(iB)%E(diag_size)) + BS_blk(iB)%E=-1._SP + BS_blk(iB)%table=0 + endif + ! +end subroutine K_block_alloc +! +! +subroutine K_block_free( iB ) + ! + use BS, ONLY:BS_blk + ! +#include + ! + integer, intent(in) :: iB + ! + YAMBO_FREE(BS_blk(iB)%mat) + deallocate(BS_blk(iB)%done) + if (BS_blk(iB)%coordinate(1)==BS_blk(iB)%coordinate(2)) then + deallocate(BS_blk(iB)%table) + deallocate(BS_blk(iB)%E) + endif + ! +end subroutine K_block_free diff --git a/src/bse/K_kernel.F b/src/bse/K_kernel.F index 09e082c441..1d81345074 100644 --- a/src/bse/K_kernel.F +++ b/src/bse/K_kernel.F @@ -10,6 +10,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) ! K = <2V-W> for n_spin=1; K= for n_spin>1 ! use pars, ONLY:SP,schlen,pi,cZERO,cI + use drivers, ONLY:l_bss use LOGO, ONLY:pickup_a_random use drivers, ONLY:l_bs_fxc,l_tddft,l_rim_w use frequency, ONLY:w_samp @@ -31,7 +32,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) & O_ng,BS_n_g_exch,BS_n_g_fxc,BS_identifier,BS_LiveTiming_steps,& & BS_K_dim,BS_K_is_ALDA,BS_cpl_K_exchange,& & BS_cpl_K_corr,K_EXCH_collision,K_CORR_collision,& -& WF_phase,n_BS_blks,BS_blk,BS_T_grp,& +& WF_phase,n_BS_blks,BS_blk,BS_T_grp,BSK_IO_mode,BSK_IO_sum_value,& & BS_nT_grps,BS_blks_free,l_BS_ares_from_res,& & l_BSE_minimize_memory,l_BSE_restart,l_BSE_kernel_complete,& & BS_perturbative_SOC,BS_K_cutoff,BS_max_val,l_BS_magnons @@ -89,8 +90,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) complex(SP), external ::TDDFT_ALDA_eh_space_G_kernel complex(SP), external ::K_exchange_kernel_resonant,K_exchange_kernel_coupling ! - logical :: l_bs_exch,l_bs_corr,l_bs_exch_wf_in_loop,l_bs_tddft_wf_in_loop,l_matrix_init,& - & l_load_kernel,l_write_kernel,l_skip_phases,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum + logical :: l_bs_exch,l_bs_corr,l_bs_exch_wf_in_loop,l_bs_tddft_wf_in_loop,l_matrix_init,BS_blk_todo,& + & l_load_kernel,l_write_kernel,l_write_kernel_step,l_skip_phases,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum ! ! I/O ! @@ -289,6 +290,9 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) l_matrix_init=.true. l_load_kernel=.true. l_write_kernel=.false. + l_write_kernel_step= index(BSK_IO_mode,"write_all_steps")/=0 + ! + if (l_write_kernel_step) call warning("Experimental mode, IO at every step. Restart not possible") ! BS_blk_done=.false. ! @@ -368,6 +372,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) if (m_steps>0) call live_timing("Loading partial kernel",m_steps) ! do i_block=1,n_BS_blks + ! + if (.not. allocated(BS_blk(i_block)%mat) ) call K_block_alloc(i_block) ! ! Read BS_blk to check if the transition was already computed ! @@ -385,17 +391,17 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) ! check if this block need to be done ! skip this block if already done ! - BS_blk_done(i_block)=.true. - skip_check: do i_Tp=1,BS_T_grp(i_Tgrp_p)%size - do i_Tk=1,BS_T_grp(i_Tgrp_k)%size - if (BS_blk(i_block)%done(i_Tk,i_Tp)=="t") cycle - BS_blk_done(i_block)=.false. - exit skip_check - enddo - enddo skip_check + BS_blk_done(i_block)=all((/BS_blk(i_block)%done=="t"/)) + BS_blk_todo =all((/BS_blk(i_block)%done=="f"/)) ! if (BS_blk_done(i_block)) m_steps=m_steps-i_steps ! + if (.not. (l_bss.or.l_partial_kernel_loaded) ) then + ! + if (BS_blk_done(i_block) .or. BS_blk_todo) call K_block_free(i_block) + ! + endif + ! call live_timing(steps=i_steps) ! enddo @@ -457,11 +463,20 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) if(l_partial_kernel_loaded.and.l_write_kernel) then call io_control(ACTION=WR,ID=ID) call io_BS_PAR_block(iq,i_block,ID,"full") + if (.not.l_bss) call K_block_free(i_block) endif #endif cycle endif ! + if (l_write_kernel_step) then + if (iHxc==1) BSK_IO_sum_value=.false. + if (iHxc==2) BSK_IO_sum_value=l_bs_exch + if (iHxc==3) BSK_IO_sum_value=l_bs_exch.or.BS_K_is_ALDA + endif + ! + if (.not. allocated(BS_blk(i_block)%mat) ) call K_block_alloc(i_block) + ! ! Read BS_blk to check if the transition was already computed ! if (BS_LiveTiming_steps>= 0) i_steps=BS_Block_size(i_block) @@ -787,7 +802,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) ! if(iHxc==3) call BS_correlation_oscillators_free(i_block,l_std_alg) ! -100 if(l_write_kernel) then +100 if(l_write_kernel.or.l_write_kernel_step) then #if defined _PAR_IO call io_control(ACTION=WR,ID=ID) call io_BS_PAR_block(iq,i_block,ID,"full") @@ -795,6 +810,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss) call io_control(ACTION=WR,COM=REP,SEC=(/i_block+1/),ID=ID) io_BS_err=io_BS(iq,X,ID) #endif + if (.not.l_bss) call K_block_free(i_block) endif ! call live_timing(steps=i_steps) diff --git a/src/interface/INIT_load.F b/src/interface/INIT_load.F index fd1f595448..90f46b8442 100644 --- a/src/interface/INIT_load.F +++ b/src/interface/INIT_load.F @@ -292,7 +292,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip) call it(defs,'FxcMode', '[TDDFT] ("G-XXX" or "R-XXX" with XXX=def/full/cut_Gmax/cut_GmGp") ',FXC_mode,verb_level=V_resp) call it(defs,'BSEEhEny','[BSK] Electron-hole energy range',BS_eh_en,E_unit,verb_level=V_resp) call it(defs,'BSKCut', '[BSK] Cutoff on the BSE Kernel, 0=full 1=none',BS_K_cutoff,verb_level=V_resp) - call it(defs,'BSKIOmode','[BSK] ("1D_linear"/"2D_standard" + "norestart")',BSK_IO_mode,verb_level=V_resp) + call it(defs,'BSKIOmode','[BSK] ("1D_linear"/"2D_standard" + "norestart" + "write_all_steps")',BSK_IO_mode,verb_level=V_resp) call it(defs,'BSKmod', '[BSE] IP/Hartree/HF/ALDA/SEX/BSfxc',BSK_mode,protect=.FALSE.,case="A") call it(defs,'Gauge' , '[BSE/X] Gauge (length|velocity)',global_gauge,verb_level=V_resp) call it('f',defs,'NoCondSumRule' ,'[BSE/X] Do not impose the conductivity sum rule in velocity gauge',verb_level=V_resp) diff --git a/src/io_parallel/io_BS_PAR_block.F b/src/io_parallel/io_BS_PAR_block.F index 79131b309f..a55b2bf7bd 100644 --- a/src/io_parallel/io_BS_PAR_block.F +++ b/src/io_parallel/io_BS_PAR_block.F @@ -15,10 +15,10 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode) ! use pars, ONLY:IP,IPL,SP,schlen,max_io_vars_open use iso_c_binding, ONLY:C_LOC,C_F_POINTER - use stderr, ONLY:STRING_remove - use BS, ONLY:BS_blk,BSK_IO_mode,MAX_BSK_LIN_size,n_BS_blks_min,BS_K_dim + use stderr, ONLY:STRING_split + use BS, ONLY:BS_blk,BSK_IO_mode,MAX_BSK_LIN_size,n_BS_blks_min,BS_K_dim,BSK_IO_sum_value use timing_m, ONLY:timing - use IO_m, ONLY:io_unit,netcdf_call,nf90_sync,variable_exist,& + use IO_m, ONLY:io_unit,netcdf_call,nf90_sync,variable_exist,io_action,RD,WR,& & write_is_on,read_is_on,io_BS_K,YAMBO_CMPLX_NO_FILL use IO_int, ONLY:io_variable_bulk ! @@ -32,18 +32,20 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode) ! integer, parameter :: n_max_tot_vars=6 ! see io_BS_PAR_init ! - character(schlen) :: BSK_IO_local + character(schlen) :: BSK_IO_local,BSK_IO_split(3) logical :: lwrite(3),file_sync integer :: x1,x2,VAR_IDs(2),ID_table,ID_energ,stat,ID_now(2) integer :: n2,i1,i2,idx1,idx2,imax,i_rep(2),ilast integer(IPL) :: idx ! - complex(SP), allocatable :: TMP_IO_VAR(:) + complex(SP), allocatable :: TMP_IO_VAR(:),mat_tmp(:,:) ! if( (.not.io_BS_K) .and. trim(mode)=="full") return ! call timing('io_BS',OPR='start') ! + if (.not. allocated(BS_blk(i_block)%mat) ) call K_block_alloc(i_block) + ! ! Get the type of block that we want to write ! ID_table=max_io_vars_open @@ -61,7 +63,8 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode) x1=BS_blk(i_block)%coordinate(1) x2=BS_blk(i_block)%coordinate(2) ! - BSK_IO_local=trim(STRING_remove(BSK_IO_mode,"norestart")) + call STRING_split(BSK_IO_mode,BSK_IO_split) + BSK_IO_local=BSK_IO_split(1) if(index(mode,"compressed")/=0) BSK_IO_local=trim(mode) if(index(mode,"tableonly")/=0) BSK_IO_local=trim(mode) ! @@ -177,6 +180,15 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode) ! ! Write one block at a time in 2D ! + if (BSK_IO_sum_value.and.write_is_on(ID)) then + allocate(mat_tmp(BS_blk(i_block)%size(1),BS_blk(i_block)%size(2))) + io_action(ID)=RD + call io_variable_bulk(ID, VAR_IDs(1), C2=mat_tmp , IPOS=(/1,x1,x2/) ) + io_action(ID)=WR + BS_blk(i_block)%mat=BS_blk(i_block)%mat+mat_tmp + deallocate(mat_tmp) + endif + ! call io_variable_bulk(ID, VAR_IDs(1), C2=BS_blk(i_block)%mat , IPOS=(/1,x1,x2/) ) if(read_is_on(ID) .and. .not.trim(mode)=="matonly") then do i2=1,BS_blk(i_block)%size(2) diff --git a/src/io_parallel/io_BS_PAR_init.F b/src/io_parallel/io_BS_PAR_init.F index c28959b971..70b759fdf9 100644 --- a/src/io_parallel/io_BS_PAR_init.F +++ b/src/io_parallel/io_BS_PAR_init.F @@ -7,7 +7,7 @@ ! integer function io_BS_PAR_init(iq,ID,mode) ! - use stderr, ONLY:intc,real2ch,STRING_remove + use stderr, ONLY:intc,real2ch,STRING_split use com, ONLY:msg use pars, ONLY:IP,IPL,SP,IP_YIO,SP_YIO,schlen,lchlen,max_io_vars_open use BS, ONLY:BS_K_dim,BS_H_dim,BS_IO_dim,BS_K_coupling,l_BSE_kernel_complete,& @@ -27,7 +27,7 @@ integer function io_BS_PAR_init(iq,ID,mode) integer, parameter :: n_kind_vars=2 ! mat, done integer, parameter :: n_max_tot_vars=6 ! maximum number is n_max_vars times 2 kinds ! - character(schlen) :: db_name,ch_tmp,dim_names(3,n_max_vars),BSK_IO_local + character(schlen) :: db_name,ch_tmp,dim_names(3,n_max_vars),BSK_IO_local,BSK_IO_split(3) character(lchlen) :: ch(n_max_vars,max_io_vars_open-1) logical :: def_var, io_var integer(IPL) :: BS_lin_size @@ -86,7 +86,8 @@ integer function io_BS_PAR_init(iq,ID,mode) if ( BS_K_coupling ) n_vars=2 if (.not.l_BS_ares_from_res) n_vars=n_vars+1 ! - BSK_IO_local=trim(STRING_remove(BSK_IO_mode,"norestart")) + call STRING_split(BSK_IO_mode,BSK_IO_split) + BSK_IO_local=BSK_IO_split(1) if(index(mode,"compressed")/=0 ) BSK_IO_local=trim(mode) ! select case(trim(BSK_IO_local)) diff --git a/src/modules/mod_BS.F b/src/modules/mod_BS.F index 2812f48647..a7803bc907 100644 --- a/src/modules/mod_BS.F +++ b/src/modules/mod_BS.F @@ -28,6 +28,7 @@ module BS logical :: BS_not_const_eh_f=.FALSE. logical :: BS_perturbative_SOC=.FALSE. logical :: BS_K_has_been_calculated_loaded=.FALSE. + logical :: BSK_IO_sum_value character(schlen) :: BSE_L_kind character(schlen) :: BSE_mode character(schlen) :: BSE_prop