diff --git a/configure b/configure
index 7be7959ee9..efc5015e76 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23155 h.849241804d.
+# Generated by GNU Autoconf 2.71 for Yambo 5.2.0 r.23180 h.e1699b65ed.
 #
 # Report bugs to <yambo@yambo-code.org>.
 #
@@ -610,8 +610,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='Yambo'
 PACKAGE_TARNAME='yambo'
-PACKAGE_VERSION='5.2.0 r.23155 h.849241804d'
-PACKAGE_STRING='Yambo 5.2.0 r.23155 h.849241804d'
+PACKAGE_VERSION='5.2.0 r.23180 h.e1699b65ed'
+PACKAGE_STRING='Yambo 5.2.0 r.23180 h.e1699b65ed'
 PACKAGE_BUGREPORT='yambo@yambo-code.org'
 PACKAGE_URL=''
 
@@ -772,8 +772,6 @@ def_scalapack
 enable_scalapack
 SCALAPACK_LIBS
 BLACS_LIBS
-with_petsc_branch
-with_slepc_branch
 internal_slepc
 internal_petsc
 compile_slepc
@@ -990,8 +988,6 @@ with_petsc_incs
 with_petsc_path
 with_petsc_libdir
 with_petsc_includedir
-with_slepc_branch
-with_petsc_branch
 enable_par_linalg
 with_blacs_libs
 with_scalapack_libs
@@ -1604,7 +1600,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures Yambo 5.2.0 r.23155 h.849241804d to adapt to many kinds of systems.
+\`configure' configures Yambo 5.2.0 r.23180 h.e1699b65ed to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1670,7 +1666,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of Yambo 5.2.0 r.23155 h.849241804d:";;
+     short | recursive ) echo "Configuration of Yambo 5.2.0 r.23180 h.e1699b65ed:";;
    esac
   cat <<\_ACEOF
 
@@ -1755,8 +1751,6 @@ Optional Packages:
   --with-petsc-libdir=<path>    Path to the Petsc lib directory
   --with-petsc-includedir=<path>
                                 Path to the Petsc include directory
-  --with-slepc-branch=<branch>  Use the <branch> of the slepc repository.
-  --with-petsc-branch=<branch>  Use the <branch> of the petsc repository.
   --with-blacs-libs=(libs|mkl)  Use BLACS libraries <libs> or setup MKL
   --with-scalapack-libs=(libs|mkl)
                                 Use SCALAPACK libraries <libs> or setup MKL
@@ -1882,7 +1876,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-Yambo configure 5.2.0 r.23155 h.849241804d
+Yambo configure 5.2.0 r.23180 h.e1699b65ed
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2511,7 +2505,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by Yambo $as_me 5.2.0 r.23155 h.849241804d, which was
+It was created by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -3269,8 +3263,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 SVERSION="5"
 SSUBVERSION="2"
 SPATCHLEVEL="0"
-SREVISION="23155"
-SHASH="849241804d"
+SREVISION="23180"
+SHASH="e1699b65ed"
 
 
 
@@ -12743,24 +12737,6 @@ then :
 fi
 
 
-
-# Check whether --with-slepc-branch was given.
-if test ${with_slepc_branch+y}
-then :
-  withval=$with_slepc_branch;
-else $as_nop
-  with_slepc_branch=none
-fi
-
-
-# Check whether --with-petsc-branch was given.
-if test ${with_petsc_branch+y}
-then :
-  withval=$with_petsc_branch;
-else $as_nop
-  with_petsc_branch=none
-fi
-
 #
 def_slepc=""
 petsc="no"
@@ -13153,8 +13129,6 @@ fi
 
 
 
-
-
 #
 
 # ============================================================================
@@ -15436,7 +15410,7 @@ if test x"$enable_nvtx" = "x";  then enable_nvtx="no" ; fi
 #
 def_cuda=""
 CUDA_FLAGS=""
-CUDA_LIBS="-cudalib=cufft,cublas,cusolver"
+CUDA_LIBS="-Mcudalib=cufft,cublas,cusolver"
 
 # Available cc options:
 #    cc20            Compile for compute capability 2.0
@@ -15458,12 +15432,12 @@ CUDA_LIBS="-cudalib=cufft,cublas,cusolver"
 printf %s "checking for CUDA support... " >&6; }
 if test x"$enable_cuda" = "xyes" ; then
   def_cuda="-D_CUDA"
-  CUDA_FLAGS="-cuda -gpu=cuda9.0,cc70,nollvm $CUDA_LIBS"
+  CUDA_FLAGS="-Mcuda=cuda9.0,cc70,nollvm $CUDA_LIBS"
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CUDA_FLAGS" >&5
 printf "%s\n" "$CUDA_FLAGS" >&6; }
 elif ! test x"$enable_cuda" = "x" ; then
   def_cuda="-D_CUDA"
-  CUDA_FLAGS="-cuda -gpu=$enable_cuda $CUDA_LIBS"
+  CUDA_FLAGS="-Mcuda=$enable_cuda $CUDA_LIBS"
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CUDA_FLAGS" >&5
 printf "%s\n" "$CUDA_FLAGS" >&6; }
 fi
@@ -15607,7 +15581,6 @@ PETSC_check="-"
 if test "$internal_petsc" = "yes" ; then
   if test "$compile_petsc" = "yes" ; then PETSC_check="C"; fi
   if test "$compile_petsc" = "no"  ; then PETSC_check="I"; fi
-  if ! test "$with_petsc_branch" = "none"; then PETSC_LIBS="$PETSC_LIBS (git branch $with_petsc_branch)"; fi
 elif test "$enable_petsc" = "yes" ; then
   PETSC_check="E"
 fi
@@ -15616,7 +15589,6 @@ SLEPC_check="-"
 if test "$internal_slepc" = "yes" ; then
   if test "$compile_slepc" = "yes" ; then SLEPC_check="C"; fi
   if test "$compile_slepc" = "no"  ; then SLEPC_check="I"; fi
-  if ! test "$with_slepc_branch" = "none"; then SLEPC_LIBS="$SLEPC_LIBS (git branch $with_slepc_branch)"; fi
 elif test "$enable_slepc" = "yes" ; then
   SLEPC_check="E"
 fi
@@ -16606,7 +16578,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by Yambo $as_me 5.2.0 r.23155 h.849241804d, which was
+This file was extended by Yambo $as_me 5.2.0 r.23180 h.e1699b65ed, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -16670,7 +16642,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-Yambo config.status 5.2.0 r.23155 h.849241804d
+Yambo config.status 5.2.0 r.23180 h.e1699b65ed
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 
diff --git a/driver/yambo.F b/driver/yambo.F
index 46740be97f..f49585ef41 100644
--- a/driver/yambo.F
+++ b/driver/yambo.F
@@ -242,10 +242,10 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
  !
 #if defined _NL
  !
- ! RT
+ ! NL
  !====
  !
- if (l_nl_optics) call NL_driver(en,X(:2),k,q,Dip)
+ if (l_nl_optics) call NL_driver(en,X(:2),Xw(:2),k,q,Dip)
  if (l_nl_optics) call mem_manager_report
  !
 #endif
diff --git a/include/version/version.m4 b/include/version/version.m4
index 59a11fbbbf..d464301e5f 100644
--- a/include/version/version.m4
+++ b/include/version/version.m4
@@ -1,9 +1,9 @@
-AC_INIT(Yambo, 5.2.0 r.23155 h.849241804d, yambo@yambo-code.org)
+AC_INIT(Yambo, 5.2.0 r.23180 h.e1699b65ed, yambo@yambo-code.org)
 SVERSION="5"
 SSUBVERSION="2"
 SPATCHLEVEL="0"
-SREVISION="23155"
-SHASH="849241804d"
+SREVISION="23180"
+SHASH="e1699b65ed"
 AC_SUBST(SVERSION)
 AC_SUBST(SSUBVERSION)
 AC_SUBST(SPATCHLEVEL)
diff --git a/lib/archive/package.list b/lib/archive/package.list
index 8ed6c766f5..6cd99e58fe 100644
--- a/lib/archive/package.list
+++ b/lib/archive/package.list
@@ -34,8 +34,8 @@ version_libxc=5.2.3
 version_lapack=3.12.0
 version_blacs=missing
 version_scalapack=2.2.1
-version_slepc=3.20.2
-version_petsc=3.20.5
+version_slepc=3.21.0
+version_petsc=3.21.0
 version_fftw=3.3.10
 version_fftqe=missing
 version_yaml=0.2.2
diff --git a/src/Yio/io_control.F b/src/Yio/io_control.F
index 47b8bd5b60..f9875066ed 100644
--- a/src/Yio/io_control.F
+++ b/src/Yio/io_control.F
@@ -24,6 +24,7 @@ subroutine io_control(ACTION,MODE,COM,SEC,ID,COMM,DO_IT)
 &                        RD,WR,RD_CL,WR_CL,OP_APP,OP_APP_CL,OP_WR,OP_RD,OP_WR_CL,OP_RD_CL,&
 &                        io_reset,io_PAR_comm,io_PAR_cpu,IO_read,IO_write,&
 &                        write_is_on
+ use stderr,        ONLY:intc
  !
  implicit none
  !
@@ -41,14 +42,14 @@ subroutine io_control(ACTION,MODE,COM,SEC,ID,COMM,DO_IT)
  if ( .not. any( (/ ACTION==RD_CL, ACTION==OP_RD_CL, ACTION==OP_WR_CL,   &
 &                   ACTION==OP_APP_CL, ACTION==OP_RD, ACTION==OP_APP,    &
 &                   ACTION==OP_WR, ACTION==RD, ACTION==WR,ACTION==WR_CL/) ) ) &
-&  call error("io_control called with a non proper action")
+&  call error("io_control called with a non proper action :"//intc(ACTION))
  !
  ! Assign a new ID if the unit is not already open
  !
  define_ID_WR=any((/ACTION==OP_WR_CL, ACTION==OP_WR, ACTION==OP_APP_CL, ACTION==OP_APP/))
  define_ID_RD=any((/ACTION==OP_RD_CL, ACTION==OP_RD/))
  !
- ! Except for the cases where the ID is defined, io_control is a trivial subrtouine
+ ! Except for the cases where the ID is defined, io_control is a trivial subrtouine (this is a joke)
  !
  if (.not. (define_ID_RD .or. define_ID_WR) ) then
    call set_action_mode_com_and_sec()
diff --git a/src/allocations/X_ALLOC_elemental.F b/src/allocations/X_ALLOC_elemental.F
index 51652e624f..d54301c2e7 100644
--- a/src/allocations/X_ALLOC_elemental.F
+++ b/src/allocations/X_ALLOC_elemental.F
@@ -8,7 +8,8 @@
 subroutine X_ALLOC_elemental(WHAT,DIM)
  !
  use pars,          ONLY:rZERO,cZERO
- use X_m,           ONLY:Resp_ii,Resp_ij,Epsilon_ii,Joint_DOS,BS_E_sorted,Epsilon_ij,X_magnons,X_dichroism,&
+ use X_m,           ONLY:Resp_ii,Resp_ij,Epsilon_ii,Joint_DOS,BS_E_sorted,BS_E_sorted_indx, &
+&                        Epsilon_ij,X_magnons,X_dichroism,                  &
 &                        X_fxc,EEL,Alpha,X_mat,X_drude_term,X_RLcomp_ig
  use PHOTOLUM,      ONLY:PL
 #ifdef _CUDA
@@ -58,6 +59,8 @@ subroutine X_ALLOC_elemental(WHAT,DIM)
    case('BS_E_SORTED')
      YAMBO_ALLOC(BS_E_sorted,(DIM(1),DIM(2),DIM(3)))
      BS_E_sorted=cZERO
+     YAMBO_ALLOC(BS_E_sorted_indx,(DIM(1),2*DIM(2),5))
+     BS_E_sorted_indx=0
    case('FXC')
      YAMBO_ALLOC(X_fxc,(DIM(1)))
      X_fxc=cZERO
@@ -103,6 +106,7 @@ subroutine X_ALLOC_elemental(WHAT,DIM)
  endif
  if (STRING_match(WHAT,"BS_E_SORTED").or.STRING_match(WHAT,"ALL")) then
    YAMBO_FREE(BS_E_sorted)
+   YAMBO_FREE(BS_E_sorted_indx)
  endif
  if (STRING_match(WHAT,"FXC").or.STRING_match(WHAT,"ALL")) then
    YAMBO_FREE(X_fxc)
diff --git a/src/bse/.objects b/src/bse/.objects
index a054e18bf0..a2f9ee2ab2 100644
--- a/src/bse/.objects
+++ b/src/bse/.objects
@@ -10,15 +10,15 @@ SLK_objects = K_inversion_do_it_SLK.o
 #endif
 objs =	K_blocks.o K_driver_init.o K_dipoles.o K_IP.o K_IP_sort.o \
         K_Transitions_setup.o K_Transition_check.o \
-        K_driver.o \
+        K_driver.o BSE_utilities.o \
         K_WF_phases.o \
-        K.o $(PARIO_objects) K_correlation_collisions_dir.o K_correlation_collisions_std.o K_exchange_collisions.o \
+        K_kernel.o $(PARIO_objects) K_correlation_collisions_dir.o K_correlation_collisions_std.o K_exchange_collisions.o \
         K_correlation_kernel_dir.o K_correlation_kernel_std.o K_exchange_kernel.o \
         K_restart.o K_solvers.o K_Haydock.o K_Haydock_response.o K_Haydock_gather.o K_Haydock_scatter.o K_screened_interaction.o \
         K_inversion_do_it_full.o EPS_via_perturbative_inversion.o \
         K_inversion_driver.o K_diagonal.o K_inversion_Lo.o  K_inversion_restart.o K_inversion_engine.o \
-        K_diago_driver.o K_diago_non_hermitian_residuals.o K_diago_hermitian_residuals.o \
+        K_diago_driver.o K_diago_left_residuals.o K_diago_right_residuals.o K_diago_overlap_matrix.o \
         K_diago_perturbative.o K_diago_response_functions.o K_eps_interpolate.o \
         K_observables.o K_multiply_by_V.o K_multiply_by_V_transpose.o K_dot_product.o K_components_folded_in_serial_arrays.o \
-        K_stored_in_a_big_matrix.o K_diago_kerr_residual.o K_diago_magnons_residual.o  \
+        K_stored_in_a_big_matrix.o \
         PL_diago_residual.o PL_via_perturbative_inversion.o $(SLEPC_objects) $(SLK_objects)
diff --git a/src/bse/BSE_utilities.F b/src/bse/BSE_utilities.F
new file mode 100644
index 0000000000..f38dbbd2c0
--- /dev/null
+++ b/src/bse/BSE_utilities.F
@@ -0,0 +1,120 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2023 The Yambo Team
+!
+! Authors (see AUTHORS file for details): AM MG DS
+!
+subroutine BS_exchange_oscillators_alloc(iG)
+ use BS,        ONLY:BS_T_grp,BS_n_g_exch
+#include<memory.h>
+ integer, intent(in) :: iG
+ if (BS_T_grp(iG)%size==0) return
+ YAMBO_ALLOC(BS_T_grp(iG)%O_x,(BS_n_g_exch,BS_T_grp(iG)%size))
+#ifdef _CUDA
+ allocate(BS_T_grp(iG)%O_x_d,mold=BS_T_grp(iG)%O_x)
+#endif
+end subroutine BS_exchange_oscillators_alloc
+!
+subroutine BS_correlation_oscillators_alloc(iB)
+ use BS,        ONLY:BS_blk,O_ng
+#include<memory.h>
+ integer, intent(in) :: iB
+ if (BS_blk(iB)%N_oscillators==0) return
+ YAMBO_ALLOC(BS_blk(iB)%O_c,(O_ng,BS_blk(iB)%N_oscillators))
+#ifdef _CUDA
+ allocate(BS_blk(iB)%O_c_d,mold=BS_blk(iB)%O_c)
+#endif
+end subroutine BS_correlation_oscillators_alloc
+!
+subroutine BS_exchange_oscillators_free(iG_ref)
+ use BS,        ONLY:BS_T_grp
+#include<memory.h>
+ integer, intent(in) :: iG_ref
+ integer :: iG
+ do iG=iG_ref,1,-1
+   YAMBO_FREE(BS_T_grp(iG)%O_x)
+#ifdef _CUDA
+   if (allocated(BS_T_grp(iG)%O_x_d)) deallocate(BS_T_grp(iG)%O_x_d)
+#endif
+ enddo
+end subroutine BS_exchange_oscillators_free
+!
+subroutine BS_correlation_oscillators_free(iB_ref,l_std_alg)
+ use BS,        ONLY:BS_T_grp,l_BSE_minimize_memory,BS_blk,n_BS_blks
+#include<memory.h>
+ integer, intent(in) :: iB_ref
+ logical, intent(in) :: l_std_alg
+ integer :: iB,ik_loop,ip_loop,ik_now,ip_now
+ ik_now=BS_blk(iB_ref)%ik
+ ip_now=BS_blk(iB_ref)%ip
+ if(iB_ref==n_BS_blks) then
+   ik_now=0
+   ip_now=0
+ endif
+ do iB=iB_ref,1,-1
+   ik_loop=BS_blk(iB)%ik
+   ip_loop=BS_blk(iB)%ip
+   if ( ik_now==ik_loop .and. ip_now==ip_loop .and. &
+   &   .not.(l_BSE_minimize_memory.or.l_std_alg)) cycle
+   if (.not.allocated(BS_blk(iB)%O_c)) exit
+   YAMBO_FREE(BS_blk(iB)%O_c)
+   YAMBO_FREE(BS_blk(iB)%O_table)
+   YAMBO_FREE(BS_blk(iB)%kp_table)
+#ifdef _CUDA
+   if (allocated(BS_blk(iB)%O_c_d)) deallocate(BS_blk(iB)%O_c_d)
+#endif
+ enddo
+end subroutine BS_correlation_oscillators_free
+!
+subroutine TDDFT_oscillators_alloc_L(iGL)
+ use electrons, ONLY:n_spin,n_spinor
+ use BS,        ONLY:BS_T_grp,BS_n_g_fxc
+#include<memory.h>
+ integer, intent(in) :: iGL
+ if (BS_T_grp(iGL)%size>0) then
+   YAMBO_ALLOC(BS_T_grp(iGL)%O_tddft_L,(BS_n_g_fxc,BS_T_grp(iGL)%size,n_spinor,n_spinor))
+#ifdef _CUDA
+   allocate(BS_T_grp(iGL)%O_tddft_L_d,mold=BS_T_grp(iGL)%O_tddft_L)
+#endif
+ endif
+end subroutine TDDFT_oscillators_alloc_L
+!
+subroutine TDDFT_oscillators_alloc_R(iGR)
+ use electrons, ONLY:n_spin,n_spinor
+ use BS,        ONLY:BS_T_grp,BS_n_g_fxc
+#include<memory.h>
+ integer, intent(in) :: iGR
+ if (BS_T_grp(iGR)%size>0) then
+   YAMBO_ALLOC(BS_T_grp(iGR)%O_tddft_R,(BS_n_g_fxc,BS_T_grp(iGR)%size,n_spin,n_spin))
+#ifdef _CUDA
+   allocate(BS_T_grp(iGR)%O_tddft_R_d,mold=BS_T_grp(iGR)%O_tddft_R)
+#endif
+ endif
+end subroutine TDDFT_oscillators_alloc_R
+!
+subroutine TDDFT_oscillators_free_L(iG_ref)
+ use BS,        ONLY:BS_T_grp
+#include<memory.h>
+ integer, intent(in) :: iG_ref
+ integer :: iGL
+ do iGL=iG_ref,1,-1
+   YAMBO_FREE(BS_T_grp(iGL)%O_tddft_L)
+#ifdef _CUDA
+   if (allocated(BS_T_grp(iGL)%O_tddft_L_d)) deallocate(BS_T_grp(iGL)%O_tddft_L_d)
+#endif
+ enddo
+end subroutine TDDFT_oscillators_free_L
+!
+subroutine TDDFT_oscillators_free_R(iG_ref)
+ use BS,        ONLY:BS_T_grp
+#include<memory.h>
+ integer, intent(in) :: iG_ref
+ integer :: iGR
+ do iGR=iG_ref,1,-1
+   YAMBO_FREE(BS_T_grp(iGR)%O_tddft_R)
+#ifdef _CUDA
+   if (allocated(BS_T_grp(iGR)%O_tddft_R_d)) deallocate(BS_T_grp(iGR)%O_tddft_R_d)
+#endif
+ enddo
+end subroutine TDDFT_oscillators_free_R
diff --git a/src/bse/DOUBLE_project.dep b/src/bse/DOUBLE_project.dep
index bb0a01a38a..5bfc14f7b2 100644
--- a/src/bse/DOUBLE_project.dep
+++ b/src/bse/DOUBLE_project.dep
@@ -1,5 +1,5 @@
+ BSE_utilities.o
  EPS_via_perturbative_inversion.o
- K.o
  K_Haydock.o
  K_Haydock_gather.o
  K_Haydock_response.o
@@ -19,10 +19,13 @@
  K_diago_driver.o
  K_diago_hermitian_residuals.o
  K_diago_kerr_residual.o
+ K_diago_left_residuals.o
  K_diago_magnons_residual.o
  K_diago_non_hermitian_residuals.o
+ K_diago_overlap_matrix.o
  K_diago_perturbative.o
  K_diago_response_functions.o
+ K_diago_right_residuals.o
  K_diagonal.o
  K_dipoles.o
  K_dot_product.o
@@ -37,6 +40,7 @@
  K_inversion_driver.o
  K_inversion_engine.o
  K_inversion_restart.o
+ K_kernel.o
  K_multiply_by_V.o
  K_multiply_by_V_slepc.o
  K_multiply_by_V_transpose.o
diff --git a/src/bse/K_Haydock.F b/src/bse/K_Haydock.F
index f9520f4e87..d715448a5f 100644
--- a/src/bse/K_Haydock.F
+++ b/src/bse/K_Haydock.F
@@ -9,6 +9,7 @@ subroutine K_Haydock(iq,W,OBS,Xk)
  !
  use pars,           ONLY:SP,schlen,cI,cZERO,cONE
  use functions,      ONLY:NAN
+ use R_lattice,      ONLY:q0_def_norm
  use X_m,            ONLY:global_gauge,X_OUTPUT_driver
  use wrapper_omp,    ONLY:V_plus_alpha_V_omp
  use frequency,      ONLY:w_samp
@@ -228,6 +229,7 @@ subroutine K_Haydock(iq,W,OBS,Xk)
    do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g)
      if (trim(OBS)=="optics")      V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)
      if (trim(OBS)=="kerr")        V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(2,:,1)
+     if (trim(OBS)=="dichroism")   V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_dic(1,:,1)
      if (trim(OBS)=="magnons")     V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_mag(1,:,1)
      V0(i_g)%fragment(:,i_ColFG) = V0(i_g)%fragment(:,i_ColFG)*sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP))
    enddo
@@ -266,9 +268,10 @@ subroutine K_Haydock(iq,W,OBS,Xk)
      if (.not.PAR_IND_T_groups%element_1D(i_g)) cycle
      do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g)
        if (trim(OBS)=="kerr") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)
+       if (trim(OBS)=="dichroism") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)/q0_def_norm
        W0(i_g)%fragment(:,i_ColFG) = W0(i_g)%fragment(:,i_ColFG)*(sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP)))
      enddo
-       if(velocity_correction) V_tmp(i_g)%fragment = W0(i_g)%fragment/BS_T_grp(i_g)%E
+     if(velocity_correction) V_tmp(i_g)%fragment = W0(i_g)%fragment/BS_T_grp(i_g)%E
    enddo
    !
    do i_g = 1,BS_nT_grps
diff --git a/src/bse/K_Haydock_response.F b/src/bse/K_Haydock_response.F
index 03e540fbd9..8d59e1c6a0 100644
--- a/src/bse/K_Haydock_response.F
+++ b/src/bse/K_Haydock_response.F
@@ -7,9 +7,10 @@
 !
 subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_threshold,FG_factor)
  !
- use pars,           ONLY:SP,cZERO,cONE,pi
+ use pars,           ONLY:SP,cZERO,cONE,cI,pi
+ use units,          ONLY:SPEED_OF_LIGHT
  use X_m,            ONLY:Resp_ii,Joint_DOS,global_gauge, &
-&                         X_magnons,Resp_ij,i_G_shift
+&                         X_magnons,Resp_ij,X_dichroism,i_G_shift
  use R_lattice,      ONLY:bare_qpg
  use frequency,      ONLY:w_samp
  use BS,             ONLY:BSE_mode
@@ -223,6 +224,8 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh
      Resp_ij(iw,i_resp(:))= -X_o(1:2,1)*Co
    case("magnons")
      X_magnons(iw,1,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi)
+   case("dichroism")
+     X_dichroism(iw,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi)*(cI/SPEED_OF_LIGHT)
    end select
    !
  enddo
@@ -242,6 +245,9 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh
    case("magnons")
      sums=(X_magnons(iw,1,2)+X_magnons(iw,1,4))/2._SP
      diff= X_magnons(iw,1,2)-X_magnons(iw,1,4)
+   case("dichroism")
+     sums=(X_dichroism(iw,2)+X_dichroism(iw,4))/2._SP
+     diff= X_dichroism(iw,2)-X_dichroism(iw,4)
    end select
    !
    update_thresh = abs(real(sums))>0._SP .and. abs(aimag(sums))>0._SP
diff --git a/src/bse/K_IP.F b/src/bse/K_IP.F
index e7ce35f566..1430298f37 100644
--- a/src/bse/K_IP.F
+++ b/src/bse/K_IP.F
@@ -21,8 +21,8 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
 &                        skip_cond_sum_rule,X_magnons,Resp_ij,X_dichroism
  use BS_solvers,    ONLY:Co_factor,diam_term_exact,para_term_w0,BSS_Wd,BSS_mode,BSS_uses_DbGd,FG_factor
  use BS,            ONLY:BS_T_grp,BS_nkFGbz_in_Tgrp,BS_nT_grps,BS_perturbative_SOC,&
- &                       BS_dip_size,l_BS_jdos,l_BS_esort,l_BS_trace,BS_H_dim,&
- &                       l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
+&                        BS_dip_size,l_BS_jdos,l_BS_esort,l_BS_trace,BS_H_dim,l_BS_kerr_asymm,&
+&                        l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
 &                        BS_K_anti_resonant,l_BS_ares_from_res
  use parallel_int,  ONLY:PP_redux_wait
  use parallel_m,    ONLY:PAR_BS_nT_col_grps,PAR_COM_eh_INDEX,PAR_IND_T_groups,&
@@ -43,7 +43,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  !
  ! Work space
  !
- integer           ::iv,ic,i_T_g,i_T,n_SOC,i_pert_SOC,i_res_ares,i_sort,nVAR,i_dip
+ integer           ::i_T_g,i_T,n_SOC,i_pert_SOC,i_res_ares,i_sort,nVAR,Indexes(5)
  real(SP)          ::f_eh,f_eh_PL,res_PL
  logical           ::dip_v,dip_r
  complex(SP)       ::res_abs,E_plus_W0,E_plus_W(2/n_sp_pol),Z_eh,E_eh,g_fac
@@ -52,6 +52,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  complex(SP), allocatable :: E_DbGd(:)
  !
  complex(SP)       ::P_weighted(3),res_kerr,res_magn(2),res_dich
+ integer,        allocatable :: vtmpI(:,:)
  complex(SP),    allocatable :: vtmpC(:,:)
  !
  if (l_bs_fxc) return
@@ -97,13 +98,15 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  ! Sorting pre-setup
  !-------------------
  if(l_BS_esort) then
-   N_BS_E_sorted=2 ! Energies (IP + degeneracy)
+   N_BS_E_sorted=2    ! Energies (IP + degeneracy)
    if(l_BS_abs)       n_BS_E_sorted=n_BS_E_sorted+1
    if(l_BS_kerr)      n_BS_E_sorted=n_BS_E_sorted+1
    if(l_BS_dichroism) n_BS_E_sorted=n_BS_E_sorted+1
    if(l_BS_magnons)   n_BS_E_sorted=n_BS_E_sorted+n_spinor
    YAMBO_ALLOC(vtmpC,(BS_H_dim,n_BS_E_sorted))
    vtmpC=cZERO
+   YAMBO_ALLOC(vtmpI,(BS_H_dim,5))
+   vtmpI=0
  endif
  !
  if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then
@@ -128,6 +131,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
      ! Energy
      !========
      !
+     Indexes=BS_T_grp(i_T_g)%table(i_T,:)
      E_plus_W0=BS_T_grp(i_T_g)%E(i_T,1)
      if(allocated(BS_T_grp(i_T_g)%W)) then
        E_plus_W0=BS_T_grp(i_T_g)%E(i_T,1)-cI*BS_T_grp(i_T_g)%W(i_T)
@@ -147,7 +151,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
      enddo
      !
      if (l_BS_esort.and.i_res_ares==1) then
-       call K_IP_sort("add",i_sort,nVAR,E_plus_W0,vtmpC)
+       call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,E_plus_W0,vtmpC)
        nVAR=nVAR+1
      endif
      !
@@ -163,21 +167,28 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
      ! Residuals DIP_cv * DIP_vc
      !
      if(l_BS_abs) then
-       res_abs=abs(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))**2
+       res_abs=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))
+       if(l_BS_kerr_asymm) then
+         res_abs=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))+     &
+         &        BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)))/2._SP
+       endif
        if(l_BS_trace) then
-         do i_dip=2,BS_dip_size
-           res_abs=res_abs+abs(BS_T_grp(i_T_g)%dipoles_opt(i_dip,i_T,1))**2
-         enddo
-         res_abs=res_abs/real(BS_dip_size,SP)
+         res_abs=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))+     &
+         &        BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))+     &
+         &        BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)))/3._SP
        endif
        res_abs=res_abs*f_eh*Z_eh*E_eh
-       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_abs,vtmpC)
+       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_abs,vtmpC)
      endif
      !
      if (l_BS_kerr) then
        res_kerr=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))
+       if(l_BS_kerr_asymm) then
+         res_kerr=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))-     &
+         &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)))/2._SP
+       endif
        res_kerr=res_kerr*Z_eh*E_eh*f_eh
-       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_kerr,vtmpC)
+       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_kerr,vtmpC)
      endif
      !
      if (l_BS_photolum) then
@@ -192,16 +203,16 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
      ! Dichroism
      !
      if(l_BS_dichroism) then
-       res_dich=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))
+       res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+     &
+       &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1)))/2._SP
        if(l_BS_trace) then
-         do i_dip=2,BS_dip_size
-           ! x*L = DIP_x*DIP_orbital;
-           res_dich=res_dich+BS_T_grp(i_T_g)%dipoles_opt(i_dip,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(i_dip,i_T,1))
-         enddo
-         res_dich=res_dich/real(BS_dip_size,SP)
+         ! x*L = DIP_x*DIP_orbital;
+         res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+     &
+         &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1))+     &
+         &         BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(3,i_T,1)))/3._SP
        endif
        res_dich=res_dich*f_eh*Z_eh/(q0_def_norm)
-       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,res_dich,vtmpC)
+       if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_dich,vtmpC)
      endif
      !
      ! Magnons
@@ -213,8 +224,8 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
        res_magn(:)=BS_T_grp(i_T_g)%dipoles_mag(:,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_mag(:,i_T,1))
        res_magn=res_magn*f_eh*Z_eh
        if (l_BS_esort.and.i_res_ares==1) then
-         call K_IP_sort("add",i_sort,nVAR,res_magn(1),vtmpC)
-	 if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,res_magn(2),vtmpC)
+         call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(1),vtmpC)
+	 if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(2),vtmpC)
        endif
      endif
      !
@@ -305,7 +316,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  !
  ! Sorting procedure
  !===================
- if (l_BS_esort) call K_IP_sort("sort",0,nVAR,cZERO,vtmpC)
+ if (l_BS_esort) call K_IP_sort("sort",0,nVAR,(/0,0,0,0,0/),vtmpI,cZERO,vtmpC)
  !
  ! ALL 2 ALL
  !==============
@@ -372,7 +383,7 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  if (l_BS_magnons) X_magnons(:,:,3)=X_magnons(:,:,3)*Co_factor/(4._SP*pi)
  !
  if (     l_rpa_IP) call K_OUTPUT(iq,W_bss,"open dump IP close reset","optics kerr magnons PL dichroism")
- call K_OUTPUT(iq,W_bss,"open dump close reset bare","E_IP")
+ call K_OUTPUT(iq,W_bss,"open dump close reset bare","E_IP E_INDX_IP")
  !
  ! Clean
  ! 
diff --git a/src/bse/K_IP_sort.F b/src/bse/K_IP_sort.F
index 9c72fbd310..38a6068617 100644
--- a/src/bse/K_IP_sort.F
+++ b/src/bse/K_IP_sort.F
@@ -5,12 +5,12 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
-subroutine K_IP_sort(what,I,N,El,vC)
+subroutine K_IP_sort(what,I,N,Indexes,vI,El,vC)
  !
  use pars,          ONLY:SP,cZERO
  use parallel_m,    ONLY:PAR_COM_eh_INDEX,PAR_COM_Xk_ibz_INDEX
- use X_m,           ONLY:BS_E_sorted,N_BS_E_sorted
- use BS,            ONLY:BS_H_dim
+ use X_m,           ONLY:BS_E_sorted,N_BS_E_sorted,BS_E_sorted_indx
+ use BS,            ONLY:BS_H_dim,l_BS_esort_indx
  use vec_operate,   ONLY:sort,degeneration_finder
  use units,         ONLY:HA2EV
  use parallel_int,  ONLY:PP_redux_wait
@@ -18,11 +18,12 @@ subroutine K_IP_sort(what,I,N,El,vC)
 #include<memory.h>
  !
  character(*)  ::what
- integer       ::I,N
+ integer       ::I,N,Indexes(5),vI(BS_H_dim,5)
  complex(SP)   ::El,vC(BS_H_dim,N_BS_E_sorted)
  !
  ! Work space
  !
+ logical                  :: l_print_warning
  integer                  :: EDIM,Ntmp,i_VAR,i_cv,i_grp,n_deg_grp
  real(SP),    allocatable :: vR(:)
  integer,     allocatable :: sort_indx(:),first_el(:),n_of_el(:)
@@ -30,11 +31,15 @@ subroutine K_IP_sort(what,I,N,El,vC)
  if (what=="add") then
    N=N+1
    vC(I,N)=El
+   if(N==1) vI(I,:)=Indexes
    return
  endif
  !
+ !
  call PP_redux_wait(vC,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
+ call PP_redux_wait(vI,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
  call PP_redux_wait(vC,COMM=PAR_COM_eh_INDEX%COMM )
+ call PP_redux_wait(vI,COMM=PAR_COM_eh_INDEX%COMM )
  !
  YAMBO_ALLOC(vR,(BS_H_dim)) 
  YAMBO_ALLOC(sort_indx,(BS_H_dim))
@@ -52,21 +57,35 @@ subroutine K_IP_sort(what,I,N,El,vC)
  ! Threshold is 1meV
  call degeneration_finder(vR(:EDIM),EDIM,first_el,n_of_el,n_deg_grp,0.001_SP/HA2EV,Include_single_values=.TRUE.)
  !
+ l_print_warning=.false.
+ !
  do i_VAR=1,n_BS_E_sorted
    BS_E_sorted(:,i_VAR,1)=cZERO
    do i_grp=1,n_deg_grp
      if(i_grp>EDIM/Ntmp) cycle
+     !
+     if(i_VAR==1.and.l_BS_esort_indx) then
+       if (n_of_el(i_grp)>18) l_print_warning=.true.
+       do i_cv=first_el(i_grp),first_el(i_grp)+min(18,n_of_el(i_grp))-1
+         BS_E_sorted_indx(i_grp,i_cv-first_el(i_grp)+1,1:5)=vI(sort_indx(i_cv),:)
+       enddo
+     endif
+     !
      if(i_VAR==2) then
        BS_E_sorted(i_grp,i_VAR,1)=real(n_of_el(i_grp),SP)
        cycle
      endif
+     !
      do i_cv=first_el(i_grp),first_el(i_grp)+n_of_el(i_grp)-1
        BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)+vC(sort_indx(i_cv),i_VAR)
      enddo
-     BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)/real(n_of_el(i_grp),SP)
+     if(i_VAR==1) BS_E_sorted(i_grp,i_VAR,1)=BS_E_sorted(i_grp,i_VAR,1)/real(n_of_el(i_grp),SP)
    enddo
+   !
  enddo
  !
+ if (l_print_warning) call warning("More than 18 degenerate transitions detected. Printing only first 18")
+ !
  ! Free
  deallocate(first_el) 
  deallocate(n_of_el)
diff --git a/src/bse/K_Transitions_setup.F b/src/bse/K_Transitions_setup.F
index 8def415263..b394f0688a 100644
--- a/src/bse/K_Transitions_setup.F
+++ b/src/bse/K_Transitions_setup.F
@@ -5,18 +5,8 @@
 !
 ! Authors (see AUTHORS file for details): AM DS IMA
 !
-subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
+subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
  !
- ! mode=K
- ! ======
- ! Out: 
- !
- !   BS_K_dim(1)
- !   BS_H_dim
- !   BS_nT_at_k
- !
- ! mode=T
- ! ======
  ! Filling of Transition groups
  !
  !type BS_T_group
@@ -33,106 +23,83 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
  use pars,         ONLY:SP
  use stderr,       ONLY:intc
  use BS_solvers,   ONLY:BSS_mode,BSS_uses_DbGd
- ! mode ="T"
  use BS,           ONLY:T_group,BS_bands,BS_T_grp,BS_nT_grps,BS_n_eh_spaces,&
 &                       BS_Blocks_and_Transitions_alloc,BS_perturbative_SOC,&
 &                       BS_nkFGbz_in_Tgrp
- ! mode ="K"
- use BS,           ONLY:BS_K_dim,BS_H_dim,BS_nT_at_k,BS_K_coupling,  &
-&                       l_BS_ares_from_res,BS_res_ares_n_mat
  use DIPOLES,      ONLY:DIPOLE_t
  use parallel_m,   ONLY:PAR_IND_WF_b_and_k,PAR_IND_T_ordered,PAR_IND_T_groups,&
 &                       PAR_IND_T_Haydock,PP_indexes_reset
- use electrons,    ONLY:levels,n_sp_pol,n_full_bands,n_met_bands
+ use electrons,    ONLY:levels,n_sp_pol
  use com,          ONLY:msg
  use R_lattice,    ONLY:qindx_X,bz_samp
  use BS,           ONLY:l_BS_magnons,l_BS_photolum
 #if defined _RT
+ use electrons,    ONLY:n_full_bands,n_met_bands
  use drivers,      ONLY:l_rt_carriers_in_use
  use RT_control,   ONLY:EQ_Transitions,EQ_NoOcc
 #endif
  !
 #include<memory.h>
  !
- character(1)   :: mode ! "K" for k-resolved, "T" for transitions groups
- type(levels)   :: Ken
- type(bz_samp)  :: Xk
- type(DIPOLE_t) :: Dip
- integer        :: iq
+ type(levels),    intent(in)   :: Ken
+ type(bz_samp),   intent(in) :: Xk
+ type(DIPOLE_t),  intent(in) :: Dip
+ integer,         intent(in) :: iq
+ integer,         intent(in) :: V_bands(2),C_bands(2)
  !
  ! Work Space
  !
  real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL
- logical      :: l_trans_in_group,l_grp_to_grp,l_skip_unoccupied
- integer      :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,isp1,isp2,  &
+ logical      :: l_trans_in_group,l_grp_to_grp
+ integer      :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,  &
 &                i_T_now,i_T_group_now,i_T_group_last,i_T_group_start,i_g,n_loops,i_loop,i_T_in_the_grp,i_res_ares,&
-&                BS_nT_at_k_tmp(Xk%nibz*2),V_bands(2),C_bands(2),&
 &                nColFG_in_T_group_now,i_ColFG,ikFGbz,ikFG
  !
  logical, external :: K_Transition_check
  !
- if (mode=="K") call section("+P",'Transition pre-analysis @q'//trim(intc(iq)))
- if (mode=="T") call section("=",'Transition Groups build-up @q'//trim(intc(iq)))
+ call section("=",'Transition Groups build-up @q'//trim(intc(iq)))
  !
- call k_build_up_BZ_tables(Xk)
+ ! BS_nT_grps is computed in PARALLEL_Transition_grouping
  !
- ! BS_nT_grps is first computed in PARALLEL_Transition_grouping
+ call msg('r','[BSK] Transition groups (total)',BS_nT_grps)
  !
- if (mode=="T") then
-   allocate(BS_T_grp(BS_nT_grps))
-   do i_g=1,BS_nT_grps
-     BS_T_grp(i_g)%size=0
-   enddo
-   n_loops=2
-   !
-   allocate(BS_nkFGbz_in_Tgrp(BS_nT_grps))
-   !
-   call PP_indexes_reset(PAR_IND_WF_b_and_k)
-   YAMBO_ALLOC(PAR_IND_WF_b_and_k%element_2D,(BS_bands(2),Xk%nibz))
-   PAR_IND_WF_b_and_k%element_2D=.FALSE.
- else
-   BS_K_dim=0
-   BS_nT_at_k_tmp=0
-   n_loops=1
- endif
+ allocate(BS_T_grp(BS_nT_grps))
+ do i_g=1,BS_nT_grps
+   BS_T_grp(i_g)%size=0
+ enddo
+ n_loops=2
  !
- l_skip_unoccupied=.true.
- V_bands=(/BS_bands(1),maxval(Ken%nbm)/)
- C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/)
-#if defined _RT
- ! Bands range. In the _RT case %nbf and %nbm turn metallic when carriers are used.
- if (l_rt_carriers_in_use.and.EQ_Transitions) then
-   l_skip_unoccupied=.false.
-   V_bands=(/BS_bands(1),maxval(n_met_bands)/)
-   C_bands=(/minval(n_full_bands)+1,BS_bands(2)/)
- endif
- if (EQ_NoOcc) then
-   l_skip_unoccupied=.false.
-   V_bands=BS_bands
-   C_bands=BS_bands
- endif
-#endif
+ allocate(BS_nkFGbz_in_Tgrp(BS_nT_grps))
+ !
+ call PP_indexes_reset(PAR_IND_WF_b_and_k)
+ YAMBO_ALLOC(PAR_IND_WF_b_and_k%element_2D,(BS_bands(2),Xk%nibz))
+ PAR_IND_WF_b_and_k%element_2D=.FALSE.
  !
  do i_loop=1,n_loops
    !
-   if (mode=="T") then
-     i_T_now=0
-     i_T_group_now=0
-     i_T_group_last=0
-   endif
+   i_T_now=0
+   i_T_group_now=0
+   i_T_group_last=0
+   !
+   nColFG_in_T_group_now=1
    !
    ! DS: WARNING
    ! I cannot revert ik and i_sp_pol loop here ...
    !
    do i_res_ares=1,BS_n_eh_spaces
      !
-     if (mode=="T") i_T_group_start=i_T_group_now+1
+     i_T_group_start=i_T_group_now+1
+     !
+     ! Here I cannot invert the ik and the i_sp loop since 
+     ! BS_T_grp and BS_nT_at_k are defined according to kpts while they
+     ! should be defined according to both i_sp_pol and kpts
      !
      do ik=1,Xk%nibz
        !
-       ! Here I cannot invert the ik and the i_sp loop since 
-       ! BS_T_grp and BS_nT_at_k are defined according to kpts while they
-       ! should be defined according to both i_sp_pol and kpts
+       if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then
+         nColFG_in_T_group_now = maxval(Xk%FGbz%N_mapped(&
+&                                Xk%k_table(ik,Xk%star(ik,:Xk%nstar(ik)))))
+       end if
        !
        ! Moreover I cannot move iv,ic loops insiede i_sp_pol and i_star loops
        ! Otherwise runs with parallelization over eh fail
@@ -140,13 +107,6 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
        ! of the algorithm with the qindx_B distributed in memory very slow
        ! since i_k_bz and i_p_bz change at each iteration of the loop
        !
-       if (BSS_uses_DbGd .and. (index(BSS_mode,'h')/=0)) then
-         nColFG_in_T_group_now = maxval(Xk%FGbz%N_mapped(&
-&                                Xk%k_table(ik,Xk%star(ik,:Xk%nstar(ik)))))
-       else
-         nColFG_in_T_group_now=1
-       end if
-       !
        do iv=V_bands(1),V_bands(2)
          do ic=C_bands(1),C_bands(2)
            !
@@ -154,16 +114,10 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
              i_sp_v=i_sp_c
              !
              if (l_BS_magnons.and.n_sp_pol==2) then
-               if(i_res_ares==1.and.i_sp_c==1) cycle
-               if(i_res_ares==2.and.i_sp_c==2) cycle
+               if(i_sp_c==1) cycle
                i_sp_v=mod(i_sp_c,n_sp_pol)+1
              endif
              !
-             if( l_skip_unoccupied) then
-                if( iv>Ken%nbm(i_sp_v) ) cycle
-                if( ic<Ken%nbf(i_sp_c) ) cycle
-             endif
-             !
              do i_star=1,Xk%nstar(ik)
                !
                ikbz=Xk%k_table(ik,Xk%star(ik,i_star))
@@ -171,25 +125,11 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
                ikpbz=qindx_X(iq,ikbz,1)
                ikp  =Xk%sstar(ikpbz,1)
                !
-               ib1=iv 
-               ib2=ic 
-               isp1=i_sp_v 
-               isp2=i_sp_c
-               if (i_res_ares==2) then
-                 ib1=ic 
-                 ib2=iv 
-                 isp1=i_sp_c 
-                 isp2=i_sp_v
-               endif
+               if (i_res_ares==1) then; ib1=iv; ib2=ic; endif
+               if (i_res_ares==2) then; ib1=ic; ib2=iv; endif
                !
-               if (.not.K_Transition_check(Ken,Dip,iq,ik,ikp,ib1,ib2,isp2,i_res_ares,&
+               if (.not.K_Transition_check(Ken,Dip,iq,ik,ikp,ib1,ib2,i_sp_c,i_res_ares,&
 &                                          E_eh,E_eh_SOC_corr,f_eh,f_eh_RES,f_eh_PL)) cycle
-               !
-               if (mode=="K") then
-                 BS_K_dim(i_res_ares)=BS_K_dim(i_res_ares)+1
-                 BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)=BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)+1
-                 cycle
-               endif
                !
                i_T_now=i_T_now+1
                i_T_group_now=T_group(i_T_now)
@@ -227,7 +167,7 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
                !
                i_T_in_the_grp=i_T_in_the_grp+1
                !
-               BS_T_grp(i_T_group_now)%table(i_T_in_the_grp,:)=(/ikbz,ib1,ib2,isp2,isp1/)
+               BS_T_grp(i_T_group_now)%table(i_T_in_the_grp,:)=(/ikbz,ib1,ib2,i_sp_c,i_sp_v/)
                !
                PAR_IND_WF_b_and_k%element_2D(ib2,ik )=.TRUE.
                PAR_IND_WF_b_and_k%element_2D(ib1,ikp)=.TRUE.
@@ -258,13 +198,13 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
                BS_T_grp(i_T_group_now)%f_RES(i_T_in_the_grp)=f_eh_RES
                !
                if (allocated(Ken%Z)) BS_T_grp(i_T_group_now)%Z(i_T_in_the_grp)=&
-&                                                            Ken%Z(ib2,ik,isp2)*Ken%Z(ib1,ikp,isp1)
+               &                      Ken%Z(ib2,ik,i_sp_c)*Ken%Z(ib1,ikp,i_sp_v)
                !
                ! Note that W is negative as the linewidth is negative(positive) above(below) the Fermi level.
                ! Presently there is the abs value here and a minus in K_IP.F
                !
                if (allocated(Ken%W)) BS_T_grp(i_T_group_now)%W(i_T_in_the_grp)= &
-&                                             (-1._SP)**(i_res_ares-1)*(abs(Ken%W(ib2,ik,isp2))+abs(Ken%W(ib1,ikp,isp1)))
+               &  (-1._SP)**(i_res_ares-1)*(abs(Ken%W(ib2,ik,i_sp_c))+abs(Ken%W(ib1,ikp,i_sp_v)))
                !
                if ( BS_perturbative_SOC ) BS_T_grp(i_T_group_now)%E_SOC_corr(:,i_T_in_the_grp)=E_eh_SOC_corr(:)
                !
@@ -279,12 +219,109 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
    !
  enddo ! loop
  !
- ! REPORT
+end subroutine K_Transitions_setup
+!
+!
+subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
+ !
+ ! Out: 
+ !
+ !   BS_K_dim(1)
+ !   BS_H_dim
+ !   BS_nT_at_k
+ !
+ use pars,         ONLY:SP
+ use stderr,       ONLY:intc
+ use BS,           ONLY:BS_K_dim,BS_H_dim,BS_nT_at_k,BS_K_coupling,   &
+&                       l_BS_ares_from_res,BS_res_ares_n_mat,BS_bands,&
+&                       BS_n_eh_spaces
+ use DIPOLES,      ONLY:DIPOLE_t
+ use electrons,    ONLY:levels,n_sp_pol
+ use com,          ONLY:msg
+ use R_lattice,    ONLY:qindx_X,bz_samp
+ use BS,           ONLY:l_BS_magnons
+#if defined _RT
+ use electrons,    ONLY:n_full_bands,n_met_bands
+ use drivers,      ONLY:l_rt_carriers_in_use
+ use RT_control,   ONLY:EQ_Transitions,EQ_NoOcc
+#endif
+ !
+#include<memory.h>
+ !
+ type(levels),    intent(in) :: Ken
+ type(bz_samp),   intent(in) :: Xk
+ type(DIPOLE_t),  intent(in) :: Dip
+ integer,         intent(in) :: iq
+ integer,         intent(out):: V_bands(2),C_bands(2)
+ !
+ ! Work Space
+ !
+ real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL
+ integer      :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,  &
+&                i_res_ares,BS_nT_at_k_tmp(Xk%nibz*2)
+ !
+ logical, external :: K_Transition_check
+ !
+ !
+ call section("+P",'Transition pre-analysis @q'//trim(intc(iq)))
+ !
+ call k_build_up_BZ_tables(Xk)
+ !  
+ BS_K_dim=0
+ BS_nT_at_k_tmp=0
  !
- if (mode=="T") then
-   call msg('r','[BSK] Transition groups (total)',BS_nT_grps)
-   return
+ V_bands=(/BS_bands(1),maxval(Ken%nbm)/)
+ C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/)
+#if defined _RT
+ ! Bands range. In the _RT case %nbf and %nbm turn metallic when carriers are used.
+ if (l_rt_carriers_in_use.and.EQ_Transitions) then
+   V_bands=(/BS_bands(1),maxval(n_met_bands)/)
+   C_bands=(/minval(n_full_bands)+1,BS_bands(2)/)
+ endif
+ if (EQ_NoOcc) then
+   V_bands=BS_bands
+   C_bands=BS_bands
  endif
+#endif
+ !
+ do i_res_ares=1,BS_n_eh_spaces
+   !
+   do i_sp_c=1,n_sp_pol
+     !
+     i_sp_v=i_sp_c
+     !
+     if (l_BS_magnons.and.n_sp_pol==2) then
+       if(i_sp_c==1) cycle
+       i_sp_v=mod(i_sp_c,n_sp_pol)+1
+     endif
+     !
+     do ik=1,Xk%nibz
+       do i_star=1,Xk%nstar(ik)
+         !
+         ikbz=Xk%k_table(ik,Xk%star(ik,i_star))
+         ikpbz=qindx_X(iq,ikbz,1)
+         ikp  =Xk%sstar(ikpbz,1)
+         !
+         do iv=V_bands(1),V_bands(2)
+           do ic=C_bands(1),C_bands(2)
+             !
+             if (i_res_ares==1) then; ib1=iv; ib2=ic; endif
+             if (i_res_ares==2) then; ib1=ic; ib2=iv; endif
+             !
+             if (.not.K_Transition_check(Ken,Dip,iq,ik,ikp,ib1,ib2,i_sp_c,i_res_ares,&
+             &                            E_eh,E_eh_SOC_corr,f_eh,f_eh_RES,f_eh_PL)) cycle
+             !
+             BS_K_dim(i_res_ares)=BS_K_dim(i_res_ares)+1
+             BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)=BS_nT_at_k_tmp(ik+(i_res_ares-1)*Xk%nibz)+1
+             !
+           enddo ! conduction 
+         enddo ! valence
+         !
+       enddo ! symmetries
+     enddo ! k (ibz)
+   enddo ! spin 
+   !
+ enddo ! i_res_ares
  !
  YAMBO_ALLOC(BS_nT_at_k,(Xk%nibz*BS_n_eh_spaces))
  BS_nT_at_k=BS_nT_at_k_tmp(:Xk%nibz*BS_n_eh_spaces)
@@ -299,4 +336,4 @@ subroutine K_Transitions_setup(mode,iq,Ken,Xk,Dip)
    if (BS_res_ares_n_mat==2.or.BS_K_coupling) BS_H_dim=BS_K_dim(1)+BS_K_dim(2)
  endif
  !
-end subroutine
+end subroutine K_Transitions_dimensions
diff --git a/src/bse/K_components_folded_in_serial_arrays.F b/src/bse/K_components_folded_in_serial_arrays.F
index 0e2ab8c321..47cd16f8c4 100644
--- a/src/bse/K_components_folded_in_serial_arrays.F
+++ b/src/bse/K_components_folded_in_serial_arrays.F
@@ -50,7 +50,7 @@ subroutine K_components_folded_in_serial_arrays(iq)
  call BSS_DICH_alloc( )
  call BSS_PL_alloc( )
  !
- if (BS_nT_grps>0) call live_timing("Folding BSE Kernel",PAR_IND_T_groups%n_of_elements(PAR_COM_eh_INDEX%CPU_id+1))
+ if (BS_nT_grps>0) call live_timing("Unfolding BSE components",PAR_IND_T_groups%n_of_elements(PAR_COM_eh_INDEX%CPU_id+1))
  !
  BS_n_eh_spaces_solver=1
  if(BS_n_eh_spaces==1.and.BS_K_coupling) BS_n_eh_spaces_solver=2
@@ -106,7 +106,7 @@ subroutine K_components_folded_in_serial_arrays(iq)
        !
        if (l_BS_magnons) BSS_dipoles_magn(:,H_pos)=BS_T_grp(i_g)%dipoles_mag(:,i_Tr,i_res_ares_solver)
        !
-       if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_dic(:,i_c,i_res_ares_solver)
+       if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_dic(:,i_Tr,i_res_ares_solver)
        !
        if (l_BS_photolum) BSS_dipoles_PL(:,H_pos)=BS_T_grp(i_g)%dipoles_opt(:,i_Tr,i_res_ares_solver)*PL_weights(:)
        !
diff --git a/src/bse/K_correlation_collisions_dir.F b/src/bse/K_correlation_collisions_dir.F
index 2024ae7980..d004339810 100644
--- a/src/bse/K_correlation_collisions_dir.F
+++ b/src/bse/K_correlation_collisions_dir.F
@@ -14,7 +14,7 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
  use D_lattice,     ONLY:nsym
  use R_lattice,     ONLY:qindx_X,bz_samp
  use BS,            ONLY:O_ng,BS_bands,BS_res_K_corr,BS_blk,&
-&                        BS_oscillators_alloc,K_CORR_collision,BS_T_grp,&
+&                        K_CORR_collision,BS_T_grp,&
 &                        BS_K_dim,l_BS_ares_from_res
  use electrons,     ONLY:n_sp_pol
  use collision_el,  ONLY:elemental_collision_free,elemental_collision_alloc
@@ -131,11 +131,11 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
        !
        ! v->v', i.e. k-q -> p-q=k-q_W-q transitions
        !
-       call EVAL_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_v_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s)
+       call SET_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_v_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s)
        !
        ! c->c', i.e. k -> p=k-q_w transitions
        !
-       call EVAL_table(i_block,1,1,i_c_k,i_c_p,i_k_sp_pol_c,i_k_s,i_p_s)
+       call SET_table(i_block,1,1,i_c_k,i_c_p,i_k_sp_pol_c,i_k_s,i_p_s)
        !
      enddo
    enddo
@@ -164,18 +164,18 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
        !
        ! v->c', i.e. k-q -> p-q=k-q_W-q transitions
        !
-       call EVAL_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_c_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s)
+       call SET_table(i_block,i_kmq_t,i_pmq_t,i_v_k,i_c_p,i_k_sp_pol_v,i_kmq_s,i_pmq_s)
        !
        ! c->v', i.e. k -> p=k-q_W transitions
        !
-       call EVAL_table(i_block,1,1,i_c_k,i_v_p,i_k_sp_pol_c,i_k_s,i_p_s)
+       call SET_table(i_block,1,1,i_c_k,i_v_p,i_k_sp_pol_c,i_k_s,i_p_s)
        !
      enddo
    enddo
    !
  end select
  !
- call BS_oscillators_alloc(.FALSE.,0,.TRUE.,i_block)
+ call BS_correlation_oscillators_alloc(i_block)
  !
 #if !defined _CUDA
  !$omp parallel default(shared), private(i_Tk,i_Tp, &
@@ -375,12 +375,11 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        !
    end subroutine fill_indexes
    !
-   subroutine EVAL_table(iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko)
+   subroutine SET_table(iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko)
      implicit none
      integer, intent(in) :: iB,iki,iko,ibi,ibo,i_sp_pol,i_s_ki,i_s_ko
      !
      if( O_todo_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol) ) return
-     !
      BS_blk(iB)%N_oscillators=BS_blk(iB)%N_oscillators+1 
      BS_blk(iB)%O_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol)=BS_blk(iB)%N_oscillators
      O_todo_table(i_s_ki,i_s_ko,iki,iko,ibi,ibo,i_sp_pol)=.true.
diff --git a/src/bse/K_correlation_collisions_std.F b/src/bse/K_correlation_collisions_std.F
index 3c7fefcc5d..62a0d0eb28 100644
--- a/src/bse/K_correlation_collisions_std.F
+++ b/src/bse/K_correlation_collisions_std.F
@@ -36,7 +36,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
  use D_lattice,     ONLY:nsym
  use R_lattice,     ONLY:qindx_X,bz_samp
  use BS,            ONLY:O_ng,BS_bands,BS_res_K_corr,BS_blk,&
-&                        BS_oscillators_alloc,K_CORR_collision,BS_T_grp,&
+&                        K_CORR_collision,BS_T_grp,&
 &                        BS_K_dim,l_BS_ares_from_res
  use electrons,     ONLY:n_sp_pol
  use collision_el,  ONLY:elemental_collision_free,elemental_collision_alloc
@@ -66,7 +66,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
  !
  logical, allocatable :: O_todo_table(:,:,:,:,:,:)
  !
- if (.not.BS_res_K_corr) return 
+ if (.not.BS_res_K_corr) return
  !
  call timing('T_space CORR Osc.',OPR='start')
  !
@@ -201,7 +201,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
    !
  end select
  !
- call BS_oscillators_alloc(.FALSE.,0,.TRUE.,i_block)
+ call BS_correlation_oscillators_alloc(i_block)
  !
 #if !defined _CUDA
  !$omp parallel default(shared), private(i_Tk,i_Tp, &
diff --git a/src/bse/K_correlation_kernel_dir.F b/src/bse/K_correlation_kernel_dir.F
index 6e974bfc2d..ff6f1e8ce0 100644
--- a/src/bse/K_correlation_kernel_dir.F
+++ b/src/bse/K_correlation_kernel_dir.F
@@ -40,8 +40,8 @@ function K_correlation_kernel_dir(i_block,i_p,i_pmq,
    return
  endif
  !
- iO1       = BS_blk(i_block)%O_table(i_k_s,i_p_s,1,1,i_n_k,i_n_p,i_k_sp_pol_n)
- iO2       = BS_blk(i_block)%O_table(i_kmq_s,i_pmq_s,i_kmq_t,i_pmq_t,i_m_k,i_m_p,i_k_sp_pol_m)
+ iO1 = BS_blk(i_block)%O_table(i_k_s,i_p_s,1,1,i_n_k,i_n_p,i_k_sp_pol_n)
+ iO2 = BS_blk(i_block)%O_table(i_kmq_s,i_pmq_s,i_kmq_t,i_pmq_t,i_m_k,i_m_p,i_k_sp_pol_m)
  !
 #ifdef _CUDA
  !
diff --git a/src/bse/K_diago_driver.F b/src/bse/K_diago_driver.F
index 08cbac35e5..a4e43858da 100644
--- a/src/bse/K_diago_driver.F
+++ b/src/bse/K_diago_driver.F
@@ -24,16 +24,17 @@ subroutine K_diago_driver(iq,W,X_static)
  use pars,           ONLY:SP
  use stderr,         ONLY:intc
  use BS,             ONLY:BS_K_coupling,BS_H_dim,BS_K_dim,BS_perturbative_SOC,&
-&                         BS_res_ares_n_mat,l_BS_optics !,BS_Blocks_symmetrize_K,&
+&                         BS_res_ares_n_mat,l_BS_optics,l_BS_dichroism !,BS_Blocks_symmetrize_K,&
  use BS_solvers,     ONLY:io_BSS_diago,BSS_perturbative_width,BSS_mode,&
 &                         BSS_write_eig_2_db,BSS_eh_W,BS_mat,BSS_n_eig,BS_E
  use X_m,            ONLY:X_t,X_OUTPUT_driver
- use electrons,      ONLY:n_sp_pol
+ use electrons,      ONLY:n_sp_pol,n_spinor
  use frequency,      ONLY:w_samp
  use com,            ONLY:isec
  use LIVE_t,         ONLY:live_timing
  use linear_algebra, ONLY:DIAGO
- use interfaces,     ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,PL_diago_residual
+ use interfaces,     ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,&
+ &                        K_diago_left_residuals,K_diago_right_residuals,PL_diago_residual
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:REP,VERIFY,OP_WR_CL,OP_RD_CL
  use parser_m,       ONLY:parser
@@ -60,7 +61,7 @@ subroutine K_diago_driver(iq,W,X_static)
  ! Work Space
  !
  integer       :: i_BS_mat,Input_n_eig
- logical       :: K_is_not_hermitian,l_diago,l_slepc,l_target_energy
+ logical       :: K_is_not_hermitian,l_diago,l_slepc,l_target_energy,l_dip_more
  character(1)  :: sec
  character(5)  :: Solver
  !
@@ -84,12 +85,14 @@ subroutine K_diago_driver(iq,W,X_static)
  !
  ! Residuals, different for each response function
  !=================================================
- complex(SP),pointer     :: BS_R_left(:)         => null()
- complex(SP),pointer     :: BS_R_right(:)        => null()
+ complex(SP),pointer     :: BS_R_left_abs(:)     => null()
+ complex(SP),pointer     :: BS_R_right_abs(:)    => null()
  !
  real(SP),   pointer     :: BS_R_PL(:,:)         => null()
  !
+ complex(SP),pointer     :: BS_R_left_kerr(:)    => null()
  complex(SP),pointer     :: BS_R_right_kerr(:)   => null()
+ complex(SP),pointer     :: BS_R_right_dich(:,:) => null()
  !
  complex(SP),pointer     :: BS_R_left_magn(:,:)  => null()
  complex(SP),pointer     :: BS_R_right_magn(:,:) => null()
@@ -102,6 +105,8 @@ subroutine K_diago_driver(iq,W,X_static)
 #if defined _SLEPC && !defined _NL
  l_slepc     = index(BSS_mode,'s')/=0
 #endif
+ !
+ l_dip_more=l_BS_kerr.or.l_BS_dichroism
  !
  if(l_diago) Solver="Diago"
 #if defined _SLEPC && !defined _NL
@@ -145,8 +150,8 @@ subroutine K_diago_driver(iq,W,X_static)
    ! Diagonalization DB (IN)
    !========================
    call io_control(ACTION=OP_RD_CL,COM=REP,MODE=VERIFY,SEC=(/1,2/),ID=ID)
-   io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,&
-&                      BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL)
+   io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
+  &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
    !
    K_is_not_hermitian=BS_K_coupling.or.(allocated(BSS_eh_W).and..not.BSS_perturbative_width)
    !
@@ -230,20 +235,19 @@ subroutine K_diago_driver(iq,W,X_static)
        if(l_slepc) call MATRIX_slepc(slepc_mat,l_target_energy,BSS_n_eig,BS_mat,E_real=BS_E_real)
 #endif
        !
-       BS_E=cmplx(BS_E_real,0._SP,SP)
+       BS_E=cmplx(BS_E_real,0._SP,kind=SP)
        !
      endif
      !
 #if defined _SLEPC && !defined _NL
      !
-     ! BSS_n_eig could be lowered by MATRIX_slepc
-     if (Input_n_eig>BSS_n_eig) then
-       call K_slepc_resize_variables()
+     if (l_slepc) then
+       ! BSS_n_eig could be lowered by MATRIX_slepc
+       if (Input_n_eig>BSS_n_eig) call K_slepc_resize_variables()
+       ! Destroy the matrix
+       call MatDestroy(slepc_mat,ierr)
      endif
      !
-     ! Destroy the matrix
-     !============================
-     if(l_slepc) call MatDestroy(slepc_mat,ierr)
 #endif
      !
      if(l_diago) call live_timing( )
@@ -251,20 +255,59 @@ subroutine K_diago_driver(iq,W,X_static)
      ! Construct the residuals of epsilon
      !====================================
      if (K_is_not_hermitian) then
+       !
+       ! Right residuals
+       if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then
+                        call K_diago_right_residuals("opt1",i_BS_mat,BS_E,BS_R_right_abs, BS_V_right)
+         if(l_dip_more) call K_diago_right_residuals("opt2",i_BS_mat,BS_E,BS_R_right_kerr,BS_V_right)
+       endif
+       if(l_BS_dichroism) then
+         call K_diago_right_residuals("dic1",i_BS_mat,BS_E,BS_R_right_dich(:,1),BS_V_right)
+         call K_diago_right_residuals("dic2",i_BS_mat,BS_E,BS_R_right_dich(:,2),BS_V_right)
+       endif
+       if(l_BS_magnons)   then
+                          call K_diago_right_residuals("mag1",i_BS_mat,BS_E,BS_R_right_magn(1,:),BS_V_right)
+         if(n_spinor==2)  call K_diago_right_residuals("mag2",i_BS_mat,BS_E,BS_R_right_magn(2,:),BS_V_right)
+       endif
        !
        call K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap)
        !
-       if (l_BS_optics ) call K_diago_non_hermitian_residuals(BS_E,BS_R_left,BS_R_right,BS_V_left,BS_V_right,BS_overlap)
-       if (l_BS_kerr   ) call K_diago_kerr_residual(BS_E,BS_V_right,BS_R_right_kerr)
-       if (l_BS_magnons) call K_diago_non_herm_magnons_residual(BS_R_left_magn,BS_R_right_magn,BS_V_left,BS_V_right,BS_overlap)
-       if (l_BS_photolum)call PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap)
+       ! Left residuals
+       if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then
+                        call K_diago_left_residuals("opt1",i_BS_mat,BS_E,BS_R_left_abs, BS_V_left,BS_overlap)
+         if(l_dip_more) call K_diago_left_residuals("opt2",i_BS_mat,BS_E,BS_R_left_kerr,BS_V_left,BS_overlap)
+       endif
+       if(l_BS_magnons) then
+                         call K_diago_left_residuals("mag1",i_BS_mat,BS_E,BS_R_left_magn(1,:),BS_V_left,BS_overlap)
+         if(n_spinor==2) call K_diago_left_residuals("mag2",i_BS_mat,BS_E,BS_R_left_magn(2,:),BS_V_left,BS_overlap)
+       endif
+       !
+       ! DS: this also should be changed according to the general structure
+       if(l_BS_photolum) call PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap)
        !
      else
        !
-       if (l_BS_optics)  call K_diago_hermitian_residuals(i_BS_mat,BS_E_real,BS_R_left,BS_R_right)
-       if (l_BS_kerr   ) call K_diago_kerr_residual(BS_E,BS_mat,BS_R_right_kerr)
-       if (l_BS_magnons) call K_diago_herm_magnons_residual(i_BS_mat,BS_R_left_magn,BS_R_right_magn)
-       if (l_BS_photolum)call PL_diago_residual(BS_mat,BS_mat,BS_R_PL,K_is_not_hermitian)
+       ! Right residuals
+       if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism) then
+                        call K_diago_right_residuals("opt1", i_BS_mat,BS_E,BS_R_right_abs, BS_mat)
+         if(l_dip_more) call K_diago_right_residuals("opt2",i_BS_mat,BS_E,BS_R_right_kerr,BS_mat)
+       endif
+       if(l_BS_dichroism) then
+         call K_diago_right_residuals("dic1",i_BS_mat,BS_E,BS_R_right_dich(:,1),BS_mat)
+         call K_diago_right_residuals("dic2",i_BS_mat,BS_E,BS_R_right_dich(:,2),BS_mat)
+       endif
+       if(l_BS_magnons) then
+                          call K_diago_right_residuals("mag1",i_BS_mat,BS_E,BS_R_right_magn(1,:),BS_mat)
+         if(n_spinor==2)  call K_diago_right_residuals("mag2",i_BS_mat,BS_E,BS_R_right_magn(2,:),BS_mat)
+       endif
+       !
+       ! Left residuals
+       if(l_BS_optics.or.l_BS_kerr.or.l_BS_dichroism)  BS_R_left_abs =conjg(BS_R_right_abs)
+       if(               l_BS_kerr.or.l_BS_dichroism)  BS_R_left_kerr=conjg(BS_R_right_kerr)
+       if(l_BS_magnons)                                BS_R_left_magn=conjg(BS_R_right_magn)
+       !
+       ! DS: this also should be changed according to the general structure
+       if(l_BS_photolum) call PL_diago_residual(BS_mat,BS_mat,BS_R_PL,K_is_not_hermitian)
        !
      endif
      !
@@ -289,18 +332,18 @@ subroutine K_diago_driver(iq,W,X_static)
      !
    endif
    !
-   ! Now I calculate epsilon
-   !=========================
-   call K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,&
-   &                                  BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL)
+   ! Now I calculate the physical quantities
+   !=========================================
+   call K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
+   &               BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
    !
    ! Diagonalization DB (OUT)
    !==========================
    if (io_err/=0) then
      !
      call io_control(ACTION=OP_WR_CL,COM=REP,MODE=VERIFY,SEC=(/1,2,3/),ID=ID)
-     io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,&
-     &                                  BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL)
+     io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
+     &             BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
      !
    endif
    !
@@ -338,12 +381,18 @@ subroutine local_alloc(what)
          YAMBO_ALLOC_P(BS_E_SOC_corr,(2/n_sp_pol,BSS_n_eig))
        endif
        if (l_BS_optics) then
-         YAMBO_ALLOC_P(BS_R_left,(BSS_n_eig))
-         YAMBO_ALLOC_P(BS_R_right,(BSS_n_eig))
+         YAMBO_ALLOC_P(BS_R_left_abs,(BSS_n_eig))
+         YAMBO_ALLOC_P(BS_R_right_abs,(BSS_n_eig))
        endif
        if (l_BS_kerr) then
          YAMBO_ALLOC_P(BS_R_right_kerr,(BSS_n_eig))
        endif
+       if (l_dip_more) then
+         YAMBO_ALLOC_P(BS_R_left_kerr,(BSS_n_eig))
+       endif
+       if (l_BS_dichroism) then
+         YAMBO_ALLOC_P(BS_R_right_dich,(BSS_n_eig,2))
+       endif
        if (l_BS_magnons) then
          YAMBO_ALLOC_P(BS_R_left_magn,(2,BSS_n_eig))
          YAMBO_ALLOC_P(BS_R_right_magn,(2,BSS_n_eig))
@@ -386,9 +435,11 @@ subroutine local_free()
      YAMBO_FREE(BS_V_left)
      YAMBO_FREE_P(BS_overlap)
      YAMBO_FREE_P(BS_E_SOC_corr)
-     YAMBO_FREE_P(BS_R_right)
-     YAMBO_FREE_P(BS_R_left)
+     YAMBO_FREE_P(BS_R_right_abs)
+     YAMBO_FREE_P(BS_R_left_abs)
      YAMBO_FREE_P(BS_R_right_kerr)
+     YAMBO_FREE_P(BS_R_left_kerr)
+     YAMBO_FREE_P(BS_R_right_dich)
      YAMBO_FREE_P(BS_R_right_magn)
      YAMBO_FREE_P(BS_R_left_magn)
      YAMBO_FREE_P(BS_R_PL)
@@ -411,10 +462,12 @@ subroutine K_slepc_resize_variables()
      if(allocated(BS_corrections)) call BSS_resize(BS_corrections)
      if(associated(BS_E_SOC_corr)) call BSS_resize(BS_E_SOC_corr)
      !
-     if(associated(BS_R_left) )    call BSS_resize(BS_R_left)
-     if(associated(BS_R_right))    call BSS_resize(BS_R_right)
+     if(associated(BS_R_left_abs) )  call BSS_resize(BS_R_left_abs)
+     if(associated(BS_R_right_abs))  call BSS_resize(BS_R_right_abs)
      !
+     if(associated(BS_R_left_kerr) ) call BSS_resize(BS_R_left_kerr)
      if(associated(BS_R_right_kerr)) call BSS_resize(BS_R_right_kerr)
+     if(associated(BS_R_right_dich)) call BSS_resize(BS_R_right_dich)
      if(associated(BS_R_left_magn) ) call BSS_resize(BS_R_left_magn)
      if(associated(BS_R_right_magn)) call BSS_resize(BS_R_right_magn)
      if(associated(BS_R_PL)        ) call BSS_resize(BS_R_PL)
diff --git a/src/bse/K_diago_left_residuals.F b/src/bse/K_diago_left_residuals.F
new file mode 100644
index 0000000000..7114d920e4
--- /dev/null
+++ b/src/bse/K_diago_left_residuals.F
@@ -0,0 +1,113 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2006 The Yambo Team
+!
+! Authors (see AUTHORS file for details): AM MG DS
+!
+subroutine K_diago_left_residuals(mode,i_BS_mat,BS_E,BS_R_left,BS_V_left,BS_overlap)
+ !
+ !  The left eigenvector u(j) of A satisfies
+ !                u(j)**H * A = lambda(j) * u(j)**H
+ !  where u(j)**H denotes the conjugate transpose of u(j).
+ !
+ !  Remember: 1/(w-H)= \sum_ij |i right><i left|j right>^{-1) <j left| /(w-E_i)
+ !
+ use pars,           ONLY:SP,cZERO,cI
+ use wrapper_omp,    ONLY:Vstar_dot_V_omp,V_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use X_m,            ONLY:global_gauge
+ use BS,             ONLY:BS_H_dim,BS_K_dim,BS_K_coupling
+ use BS_solvers,     ONLY:BSS_dipoles_opt,BSS_eh_E,BSS_eh_Z,BSS_eh_f,BSS_n_eig
+ use MAGNONS,        ONLY:BSS_dipoles_magn
+ use DICHROISM,      ONLY:BSS_dipoles_dich
+ !
+ implicit none
+ !
+ character(*),intent(in)  :: mode
+ integer,     intent(in)  :: i_BS_mat
+ complex(SP), intent(in)  :: BS_E(BSS_n_eig)
+ complex(SP),target,  intent(out) :: BS_R_left(BSS_n_eig)
+ complex(SP),         intent(in)  :: BS_V_left(:,:)
+ complex(SP),optional,intent(in)  :: BS_overlap(BSS_n_eig,BSS_n_eig)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     :: px
+ integer              :: i_lambda,n_res,res_range(2)
+ logical              :: velocity_correction
+ character(13)        :: scheme
+ !
+ complex(SP),allocatable :: tmp_res(:)
+ complex(SP),pointer     :: BS_R_tmp(:) => null()
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ if (.not.present(BS_overlap)) scheme="hermitian"
+ if (     present(BS_overlap)) scheme="non-hermitian"
+ !
+ velocity_correction=(index(mode,"opt")>0).and.(trim(global_gauge)=="velocity")
+ !
+ if (.not.BS_K_coupling) res_range=(/1,BS_K_dim(i_BS_mat)/)+BS_K_dim(1)*(i_BS_mat-1)
+ if (     BS_K_coupling) res_range=(/1,BS_H_dim/)
+ !
+ allocate(tmp_res(res_range(1):res_range(2)))
+ n_res=res_range(2)-res_range(1)+1
+ !
+ if (trim(scheme)=="hermitian")     BS_R_tmp => BS_R_left
+ if (trim(scheme)=="non-hermitian") allocate(BS_R_tmp(BSS_n_eig))
+ !
+ ! [1] BS_R_tmp(i)= <i_K|j><j|q0> 
+ !                = conjg( BS_V_left(k,i))*BSS_dipoles(k)*R_k
+ BS_R_tmp=cZERO
+ !
+ call live_timing('EPS L residuals',px%n_of_elements(myid+1))
+ !
+ if(trim(mode)=="opt1") tmp_res=BSS_dipoles_opt(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="opt2") tmp_res=BSS_dipoles_opt(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="dic1") tmp_res=BSS_dipoles_dich(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2))
+ if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2))
+ !
+ tmp_res = tmp_res*sqrt(cmplx(BSS_eh_f(res_range(1):res_range(2)),kind=SP)) 
+ if (allocated(BSS_eh_Z)) tmp_res=tmp_res*sqrt(BSS_eh_Z(res_range(1):res_range(2)))
+ if (velocity_correction) tmp_res=tmp_res/BSS_eh_E(res_range(1):res_range(2))
+ !
+ do i_lambda=1,BSS_n_eig
+   !
+   if (.not.px%element_1D(i_lambda)) cycle
+   !
+   BS_R_tmp(i_lambda)=Vstar_dot_V_omp(n_res,BS_V_left(:,i_lambda),tmp_res)
+   if (velocity_correction) BS_R_tmp(i_lambda)=BS_R_tmp(i_lambda)*BS_E(i_lambda)
+   !
+   call live_timing(steps=1)
+   !
+ enddo
+ call PP_redux_wait(BS_R_tmp)
+ !
+ call live_timing()
+ !
+ if (trim(scheme)=="hermitian")  return
+ !
+ ! [2] BS_R_left(i)=BS_overlap(i,j)BS_R_tmp(j)
+ !
+ BS_R_left=cZERO
+ !
+ call live_timing('EPS L x overlap',px%n_of_elements(myid+1))
+ do i_lambda=1,BSS_n_eig
+   if (.not.px%element_1D(i_lambda)) cycle
+   !
+   BS_R_left(i_lambda)=V_dot_V_omp(BSS_n_eig,BS_overlap(i_lambda,:),BS_R_tmp)
+   !
+   call live_timing(steps=1)
+ enddo
+ call PP_redux_wait(BS_R_left)
+ call live_timing
+ !
+ call PP_indexes_reset(px)
+ !
+end subroutine K_diago_left_residuals
diff --git a/src/bse/K_diago_overlap_matrix.F b/src/bse/K_diago_overlap_matrix.F
new file mode 100644
index 0000000000..d697371978
--- /dev/null
+++ b/src/bse/K_diago_overlap_matrix.F
@@ -0,0 +1,51 @@
+!
+!        Copyright (C) 2000-2020 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM, MG, DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap)
+ !
+ use pars,           ONLY:SP,cZERO,cONE
+ use wrapper_omp,    ONLY:M_by_M_omp
+ use interfaces,     ONLY:LINEAR_ALGEBRA_driver 
+ use LIVE_t,         ONLY:live_timing
+ use linear_algebra, ONLY:INV
+ use BS,             ONLY:BS_H_dim
+ use BS_solvers,     ONLY:BSS_n_eig
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig)
+ complex(SP),intent(out) :: BS_overlap(BSS_n_eig,BSS_n_eig)
+ !
+ ! BS_overlap(i,j)=sum_k conjg(BS_V_left(k,i))*BS_V_right(k,j)
+ !
+ BS_overlap=cZERO
+ !
+ call live_timing('BSK overlap mat',1)
+ !
+ call M_by_M_omp('c','n',BSS_n_eig,BSS_n_eig,BS_H_dim,cONE,BS_V_left,BS_H_dim,BS_V_right,BS_H_dim,cZERO,BS_overlap,BSS_n_eig)
+ !
+ call LINEAR_ALGEBRA_driver(INV,M=BS_overlap)
+ call live_timing(steps=1)
+ call live_timing()
+ !
+end subroutine K_diago_overlap_matrix
diff --git a/src/bse/K_diago_response_functions.F b/src/bse/K_diago_response_functions.F
index aa32c2836c..b12c9fcba2 100644
--- a/src/bse/K_diago_response_functions.F
+++ b/src/bse/K_diago_response_functions.F
@@ -5,12 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): DS AM MG
 !
-subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,&
- &                                  BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL)
+subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
+ &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
  !
  ! eps2(iw)= 1 - Sum  [BS_R_left(i)*BS_R_right(i)] / [w+i*eta - E(i)]  
  !
- use pars,           ONLY:cZERO,cONE,SP,pi
+ use pars,           ONLY:cZERO,cONE,cI,SP,pi
+ use units,          ONLY:SPEED_OF_LIGHT
  use X_m,            ONLY:Resp_ii,Joint_DOS,BS_E_sorted,&
 &                         global_gauge,i_G_shift
  use electrons,      ONLY:n_sp_pol,n_spinor
@@ -21,23 +22,23 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
  use BS,             ONLY:BS_perturbative_SOC,BS_res_ares_n_mat,l_BS_abs,l_BS_jdos,l_BS_esort,&
 &                         BS_K_anti_resonant,l_BS_ares_from_res,BS_K_coupling
  use BS_solvers,     ONLY:BSS_n_eig,Co_factor
- use R_lattice,      ONLY:bare_qpg
+ use R_lattice,      ONLY:bare_qpg,q0_def_norm
  use units,          ONLY:HA2EV
  use vec_operate,    ONLY:degeneration_finder,sort
  use BS_solvers,     ONLY:B_Hall,para_term_w0
- use X_m,            ONLY:Resp_ij,X_magnons
+ use X_m,            ONLY:Resp_ij,X_magnons,X_dichroism
  use PHOTOLUM,       ONLY:PL,PL_prefactor
- use BS,             ONLY:l_BS_kerr,l_BS_magnons,l_BS_photolum,l_BS_dichroism
+ use BS,             ONLY:l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,l_BS_dichroism
  !
  implicit none
  !
  type(w_samp),intent(in)  :: W
  integer,     intent(in)  :: iq,i_BS_mat
  complex(SP), intent(in)  :: BS_E(BSS_n_eig)
- complex(SP), pointer, intent(in) :: BS_R_left(:),BS_R_right(:)
+ complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:)
  real(SP),    pointer, intent(in) :: BS_E_SOC_corr(:,:)
  complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:)
- complex(SP), pointer, intent(in) :: BS_R_right_kerr(:)
+ complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:)
  real(SP),    pointer, intent(in) :: BS_R_PL(:,:)
  !
  ! Work space
@@ -48,7 +49,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
  real(SP), allocatable     :: vtmpR(:)
  complex(SP), allocatable  :: BS_E_sorted_tmp(:,:)
  integer, allocatable      :: sort_indx(:),first_exc(:),n_of_exc(:)
- complex(SP)               :: g_fac,res_kerr,res_magn(2)
+ complex(SP)               :: g_fac,res_kerr,res_dich,res_magn(2)
  logical                   :: l_ADD_the_ARES_using_the_RES
  !
  n_SOC=1
@@ -85,10 +86,17 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
    if (.not.px%element_1D(i1)) cycle
    !
    if (l_BS_abs) then
-     res_abs=BS_R_left(i1)*BS_R_right(i1)*ares_fac
+     res_abs=BS_R_left_abs(i1)*BS_R_right_abs(i1)*ares_fac
    endif
    if (l_BS_kerr) then
-     res_kerr=BS_R_left(i1)*BS_R_right_kerr(i1)*ares_fac
+     res_kerr=BS_R_left_abs(i1)*BS_R_right_kerr(i1)*ares_fac
+     if(l_BS_kerr_asymm) then
+       res_kerr=(BS_R_left_abs(i1)*BS_R_right_kerr(i1)-BS_R_left_kerr(i1)*BS_R_right_abs(i1))*ares_fac/2._SP
+     endif
+   endif
+   if(l_BS_dichroism) then
+     res_dich=(BS_R_left_abs(i1)*BS_R_right_dich(i1,1)+BS_R_left_kerr(i1)*BS_R_right_dich(i1,2))
+     res_dich=res_dich*ares_fac/q0_def_norm/2._SP
    endif
    if(l_BS_magnons) res_magn(:)=BS_R_left_magn(:,i1)*BS_R_right_magn(:,i1)*ares_fac
    !
@@ -135,6 +143,18 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
        endif
        B_Hall(2)=B_Hall(2)+res_kerr*g_fac
      endif
+     !
+     if(l_BS_dichroism) then
+       if(l_BS_esort.and.i_BS_mat==1) then
+         BS_E_sorted_tmp(i1,nVAR+1:nVAR+1)=res_dich
+         nVAR=nVAR+1
+       endif
+       X_dichroism(:,2)=X_dichroism(:,2)-res_dich/(W%p(:)-EXC_E(i_pert_SOC))
+       if (l_ADD_the_ARES_using_the_RES) then
+         X_dichroism(:,2)=X_dichroism(:,2)+conjg(res_dich)/(W%p(:)+conjg(EXC_E(i_pert_SOC)))
+       endif
+     endif
+     !
      if(l_BS_magnons) then
        if(l_BS_esort.and.i_BS_mat==1) then
          BS_E_sorted_tmp(i1,nVAR+1:nVAR+n_spinor)=res_magn(:n_spinor)
@@ -147,6 +167,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
          X_magnons(:,2,2)=X_magnons(:,2,2)+conjg(res_magn(1))/(W%p(:)+conjg(EXC_E(i_pert_SOC)))
        endif
      endif
+     !
      if(l_BS_photolum) then
        PL(:,2)=PL(:,2)+PL_prefactor*ares_fac*BS_R_PL(1,i1)*aimag(-1._SP/(W%p(:)-EXC_E(i_pert_SOC)))/pi
        if (l_ADD_the_ARES_using_the_RES) then
@@ -213,6 +234,10 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
    call PP_redux_wait(B_Hall(2))
  endif
  !
+ if(l_BS_dichroism) then
+   call PP_redux_wait(X_dichroism(:,2))
+ endif
+ !
  if(l_BS_magnons) then
    call PP_redux_wait(X_magnons(:,:,2))
  endif
@@ -233,6 +258,9 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS
    if (BS_K_anti_resonant.and..not.BS_K_coupling) B_Hall(2)=B_Hall(2)-conjg(B_Hall(2))
  endif
  !
+ ! Factors to be fixed
+ if (l_BS_dichroism) X_dichroism(:,2)=X_dichroism(:,2)*(cI/SPEED_OF_LIGHT)*Co_factor/(4._SP*pi)
+ !
  if (l_BS_magnons) X_magnons(:,:,2)=X_magnons(:,:,2)*Co_factor/(4._SP*pi)
  !
 end subroutine K_diago_response_functions
diff --git a/src/bse/K_diago_right_residuals.F b/src/bse/K_diago_right_residuals.F
new file mode 100644
index 0000000000..5f97a86f71
--- /dev/null
+++ b/src/bse/K_diago_right_residuals.F
@@ -0,0 +1,79 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2022 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+subroutine K_diago_right_residuals(mode,i_BS_mat,BS_E,BS_R_right,BS_V_right)
+ !
+ ! BS_R_right_XX(i)=<q0_XX|j><j|i_R>
+ !                 =conjg(BSS_dipoles(j))*BS_V_right(j,i)
+ !
+ use pars,           ONLY:SP,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use X_m,            ONLY:global_gauge
+ use BS_solvers,     ONLY:BSS_eh_E,BSS_eh_Z,BSS_eh_f,BSS_n_eig,BSS_dipoles_opt
+ use BS,             ONLY:BS_K_dim,BS_H_dim,BS_K_coupling
+ use MAGNONS,        ONLY:BSS_dipoles_magn
+ use DICHROISM,      ONLY:BSS_dipoles_dich
+ !
+ implicit none
+ !
+ character(*),intent(in)  :: mode
+ integer,     intent(in)  :: i_BS_mat
+ complex(SP), intent(in)  :: BS_E(BSS_n_eig)
+ complex(SP), intent(out) :: BS_R_right(BSS_n_eig)
+ complex(SP), intent(in)  :: BS_V_right(:,:)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_lambda,n_res,res_range(2)
+ logical              ::velocity_correction
+ !
+ complex(SP),allocatable :: tmp_res(:)
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ BS_R_right=cZERO
+ velocity_correction=(index(mode,"opt")>0).and.(trim(global_gauge)=="velocity")
+ !
+ if (.not.BS_K_coupling) res_range=(/1,BS_K_dim(i_BS_mat)/)+BS_K_dim(1)*(i_BS_mat-1)
+ if (     BS_K_coupling) res_range=(/1,BS_H_dim/)
+ !
+ allocate(tmp_res(res_range(1):res_range(2)))
+ n_res=res_range(2)-res_range(1)+1
+ !
+ call live_timing(trim(mode)//' residuals ',px%n_of_elements(myid+1))
+ !
+ if(trim(mode)=="opt1") tmp_res=BSS_dipoles_opt(1,res_range(1):res_range(2))
+ if(trim(mode)=="opt2") tmp_res=BSS_dipoles_opt(2,res_range(1):res_range(2))
+ if(trim(mode)=="dic1") tmp_res=BSS_dipoles_dich(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2))
+ if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2))
+ !
+ tmp_res = conjg(tmp_res)*sqrt(cmplx(BSS_eh_f(res_range(1):res_range(2)),kind=SP)) 
+ if (allocated(BSS_eh_Z)) tmp_res=tmp_res*sqrt(BSS_eh_Z(res_range(1):res_range(2)))
+ if (velocity_correction) tmp_res=tmp_res/BSS_eh_E(res_range(1):res_range(2))
+ !
+ do i_lambda=1,BSS_n_eig
+   if (.not.px%element_1D(i_lambda)) cycle
+   !
+   BS_R_right(i_lambda)=V_dot_V_omp(n_res,tmp_res,BS_V_right(:,i_lambda))
+   if (velocity_correction) BS_R_right(i_lambda)=BS_R_right(i_lambda)*BS_E(i_lambda)
+   !
+   call live_timing(steps=1)
+ enddo
+ call PP_redux_wait(BS_R_right)
+ call live_timing
+ !
+ call PP_indexes_reset(px)
+ deallocate(tmp_res)
+ !
+end subroutine K_diago_right_residuals
diff --git a/src/bse/K_dipoles.F b/src/bse/K_dipoles.F
index 6eebdb43fb..0e47c08552 100644
--- a/src/bse/K_dipoles.F
+++ b/src/bse/K_dipoles.F
@@ -13,13 +13,14 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
  use vec_operate,   ONLY:v_norm
  use R_lattice,     ONLY:bz_samp,q0_def_norm,qindx_X
  use stderr,        ONLY:intc
+ use com,           ONLY:msg
  use wave_func,     ONLY:WF
  use stderr,        ONLY:STRING_match
  use interfaces,    ONLY:WF_load,WF_free
  use electrons,     ONLY:levels,spin
  use X_m,           ONLY:X_t,global_gauge,i_G_shift
  use DIPOLES,       ONLY:DIPOLE_t,DIP_alloc,DIP_rotated
- use BS_solvers,    ONLY:BSS_Vnl_included
+ use BS_solvers,    ONLY:BSS_Vnl_included,BSS_Q_dir,BSS_E_dir,BSS_P_dir
  use BS,            ONLY:BS_T_grp,BS_K_coupling,BS_nT_grps,BSE_L_kind,BS_bands,&
 &                        BS_dip_size,l_BS_trace,BS_n_eh_spaces,&
 &                        l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
@@ -29,7 +30,7 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
 &                        PAR_COM_Xk_ibz_INDEX
  use collision_el,  ONLY:elemental_collision,elemental_collision_free,elemental_collision_alloc
  use timing_m,      ONLY:timing
- use vec_operate,   ONLY:v_rotate
+ use vec_operate,   ONLY:v_rotate,cross_product
  !
 #include<memory.h>
  !
@@ -43,6 +44,7 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
  !
  ! Work space
  !
+ logical           ::geometry_ok
  integer           ::ik_bz,ok_bz,ik,ok,isymm,osymm,iGo,iv,ic,i_sp_pol_c,i_sp_pol_v,i_T_g,i_T,&
 &                    io_err,i_res_ares,i_dip
  complex(SP)       ::DIP_Splus,DIP_Smins,DIP_projected(BS_dip_size),DIP_expanded(3)
@@ -78,18 +80,35 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
  !
  if (iq==1.and.i_G_shift==1) then
    !
-   ! Directions for the external field
+   BSS_P_dir=cross_product(BSS_Q_dir,BSS_E_dir)
+   !
+   if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) then
+     BSS_P_dir=0._SP
+     call v_rotate(trim(BSE_dipole_geometry),-pi/2._SP,BSS_E_dir,v_out=BSS_P_dir)
+     geometry_ok=abs(v_norm(BSS_P_dir)-1._SP)<1.E-5_SP .and. &
+     &  abs(dot_product(BSS_E_dir,BSS_P_dir))<1.E-5_SP
+     if ( .not. geometry_ok) call error(" Field direction and BSEdips do not agree")
+     call msg("r"," BSEdips set in input. This imposes the propagation direction")
+     BSS_Q_dir=-cross_product(BSS_E_dir,BSS_P_dir)
+   endif
    !
-   BS_field_direction(:,1)=Dip%q0/v_norm(Dip%q0)
+   geometry_ok=abs(dot_product(BSS_E_dir,BSS_Q_dir))<1.E-5_SP
    !
-   if (l_BS_trace) then
-     BS_field_direction(:,1)=(/1._SP,0._SP,0._SP/)
-     BS_field_direction(:,2)=(/0._SP,1._SP,0._SP/)
-     BS_field_direction(:,3)=(/0._SP,0._SP,1._SP/)
-   else if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) then
-     call v_rotate(trim(BSE_dipole_geometry),-pi/2._SP,BS_field_direction(:,1),v_out=BS_field_direction(:,2))
+   if ( l_BS_trace .and. .not. geometry_ok ) then
+     call warning(" Propagation direction not ortogonal to field polarization")
+     call warning(" Setting z for propagation and x-y for polarization")
+     BSS_E_dir=(/1._SP,0._SP,0._SP/)
+     BSS_P_dir=(/0._SP,1._SP,0._SP/)
+     BSS_Q_dir=(/0._SP,0._SP,1._SP/)
+     if (trim(BSE_dipole_geometry)/='none'.and.l_BS_kerr) BSE_dipole_geometry="xy"
    endif
    !
+   ! Directions for the external field
+   !
+   BS_field_direction(:,1)=BSS_E_dir  ! external field
+   BS_field_direction(:,2)=BSS_P_dir  ! induced polarization
+   BS_field_direction(:,3)=BSS_Q_dir  ! field propagation direction
+   !
  endif
  !
  call live_timing('BSE dipoles',PAR_BS_nT_col_grps)
diff --git a/src/bse/K_driver.F b/src/bse/K_driver.F
index c9f84219b7..919c984d78 100644
--- a/src/bse/K_driver.F
+++ b/src/bse/K_driver.F
@@ -16,15 +16,17 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  use DIPOLES,        ONLY:DIPOLE_t
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:OP_RD_CL,REP
- use BS_solvers,     ONLY:BSS_n_freqs,BSS_er,BSS_dr,BSS_mode,BSS_damp_reference,BSS_q0,BSS_uses_DbGd
+ use BS_solvers,     ONLY:BSS_n_freqs,BSS_er,BSS_dr,BSS_mode,BSS_damp_reference,BSS_q0,BSS_uses_DbGd,&
+&                         BSS_Q_dir,BSS_E_dir
  use BS,             ONLY:BS_bands,BS_q,BS_n_g_exch,BS_res_K_corr,BS_n_g_W,BS_cpl_K_corr,&
-&                         BS_Blocks_and_Transitions_free,BSqpts
+&                         BS_Blocks_and_Transitions_free,BSqpts,O_ng
  use TDDFT,          ONLY:FXC_per_memstps,FXC_n_mem_freqs,FXC_is_retarded
  use drivers,        ONLY:l_rpa_IP,l_bs_fxc,l_bss
  use parallel_m,     ONLY:PAR_IND_WF_b_and_k,PAR_K_scheme,PARALLEL_default_mode
  use parallel_int,   ONLY:PP_wait,PP_redux_wait,PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
  use interfaces,     ONLY:eval_G_minus_G
- use R_lattice,      ONLY:bz_samp,qindx_B,qindx_X,qindx_B_max,qindx_free
+ use vec_operate,    ONLY:v_norm
+ use R_lattice,      ONLY:bz_samp,qindx_B,qindx_X,qindx_B_max,qindx_free,G_m_G_maxval,q0_def_norm
  !
 #include<memory.h>
  !
@@ -37,7 +39,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  ! Work Space
  !
  type(X_t)         :: X_tddft,X_oscillators
- integer           :: iq,Max_G_m_G,iG_BSE,iGo_BSE
+ integer           :: iq,Max_G_m_G,iG_BSE,iGo_BSE,VB(2),CB(2)
  type(w_samp)      :: W_bss 
  !
  ! I/O
@@ -57,6 +59,12 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  !=========================
  call K_driver_init("init",1,Ken,Xk)
  !
+ ! q0 renormalization and field direction
+ !
+ BSS_E_dir(:) = BSS_E_dir(:)/v_norm(BSS_E_dir)
+ BSS_Q_dir(:) = BSS_Q_dir(:)/v_norm(BSS_Q_dir)
+ BSS_q0(:) = BSS_E_dir(:)*q0_def_norm
+ !
  ! Energy points
  !===============
  call W_reset(W_bss)
@@ -143,25 +151,14 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
      ! W/X RL size setup looking at the max index of G-G' 
      ! =========
      !
-     iGo_BSE=qindx_B_max
      iG_BSE =BS_n_g_W
-     if (.not.iq==1) then
-       iGo_BSE=maxval((/4*qindx_B_max,qindx_X(iq,:,2)/))
-       iG_BSE =max(BS_n_g_exch,BS_n_g_W)
-       !iGo_BSE=4*maxval((/4*qindx_B_max,qindx_X(iq,:,2)/))
-       !iG_BSE =maxval((/BS_n_g_exch,BS_n_g_W,qindx_B_max/))
-     endif
+     iGo_BSE=qindx_B_max
+     ! This needs be changed in case i_gW is redefined vi G_m_G
+     !if (.not.iq==1) iGo_BSE=maxval((/4*qindx_B_max,qindx_X(iq,:,2)/))
      !
+     ! This is the size of the first dimension of the G_m_G table
      Max_G_m_G=eval_G_minus_G(iG_BSE,iGo_BSE)
-     !
-     if (.not.iq==1) then
-       if (BS_n_g_exch> Max_G_m_G ) then
-         call warning (' Exch size reduced from '//trim(intc(BS_n_g_exch))//' to '&
-&                        //trim(intc(Max_G_m_G))//' due to symmetry constraints')
-         BS_n_g_exch= Max_G_m_G
-         call msg('s','[BSE-exch] Exch size forced by symmetry to be ',BS_n_g_exch)
-       endif
-     endif
+     O_ng=G_m_G_maxval
      !
      if (BS_n_g_W> Max_G_m_G ) then
        call warning (' W size reduced from '//trim(intc(BS_n_g_W))//' to '&
@@ -175,20 +172,20 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
    call DIPOLE_dimensions(Ken,Dip,BS_bands,BSS_q0)
    !
    ! ... EH space (no parallelization)
-   call K_Transitions_setup("K",iq,Ken,Xk,Dip)
+   call K_Transitions_dimensions(iq,Ken,Xk,Dip,VB,CB)
    !
    !... Parallel distribution
    call PARALLEL_global_Response_T_transitions(Xk)
    !
    ! ... EH space (with parallelization) and Transition groups build-up
-   call K_Transitions_setup("T",iq,Ken,Xk,Dip)
+   call K_Transitions_setup(iq,Ken,Xk,Dip,VB,CB)
    !
    !... Blocks build-up
    if (.not.l_rpa_IP) call K_blocks()
    !
    ! Wave Functions distribution 
    !=============================
-   call PARALLEL_WF_distribute(B_and_K_index=PAR_IND_WF_b_and_k,CLEAN_UP=.FALSE.)
+   call PARALLEL_WF_distribute(B_and_K_index=PAR_IND_WF_b_and_k,CLEAN_UP=iq==BS_q(1))
    call PARALLEL_WF_index( )
    !
    ! 0. The Dipoles 
@@ -201,7 +198,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
    !
    ! 2. The KERNEL
    ! ================
-   if (.not.l_rpa_IP) call K(iq,Ken,Xk,q,X_static_pp,Xw,W_bss)
+   if (.not.l_rpa_IP) call K_kernel(iq,Ken,Xk,q,X_static_pp,Xw,W_bss)
    !
    ! 3. The SOLVER
    ! =================
diff --git a/src/bse/K_driver_init.F b/src/bse/K_driver_init.F
index a8d58a104b..f92637f164 100644
--- a/src/bse/K_driver_init.F
+++ b/src/bse/K_driver_init.F
@@ -8,7 +8,7 @@
 subroutine K_driver_init(what,iq,Ken,Xk)
  !
  use R_lattice,    ONLY:bz_samp
- use electrons,    ONLY:levels,n_sp_pol,n_spinor
+ use electrons,    ONLY:levels,n_sp_pol,n_spinor,n_spin
  use parser_m,     ONLY:parser
  use parallel_m,   ONLY:PARALLEL_default_mode
  use stderr,       ONLY:STRING_match
@@ -19,12 +19,15 @@ subroutine K_driver_init(what,iq,Ken,Xk)
 &                       BS_perturbative_SOC,l_BS_magnons,l_BS_photolum,&
 &                       BS_cpl_K_exchange,BS_n_g_exch,BS_res_K_exchange,BS_K_coupling,BS_res_ares_n_mat,&
 &                       BS_n_eh_spaces,l_BS_ares_from_res,BS_bands,BS_K_anti_resonant
- use TDDFT,        ONLY:FXC_is_retarded,FXC_n_g_corr
+ use TDDFT,        ONLY:FXC_is_retarded,FXC_n_g_corr,l_Fxc_from_Vxc
  use drivers,      ONLY:l_col_cut,l_rpa_IP,l_alda_fxc,l_bs_fxc
  use D_lattice,    ONLY:i_time_rev,i_space_inv,l_3D
 #if defined _RT
  use drivers,      ONLY:l_rt_carriers_in_use
  use RT_control,   ONLY:NEQ_Kernel,EQ_Transitions,EQ_NoOcc,NEQ_Residuals,RT_BSE_Occ_Mode
+#endif
+#if defined _CUDA
+ use TDDFT,        ONLY:FXC_mode
 #endif
  !
  implicit none
@@ -36,7 +39,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  !
  ! Work Space
  !
- logical :: l_flag,l_slepc_special
+ logical :: l_flag,l_slepc_special,l_Fxc_Libxc
  !
  ! Defaults
  !==========
@@ -143,7 +146,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  if (iq/=1.and.i_time_rev==0.and.i_space_inv==0 )  l_BS_ares_from_res=.FALSE.
  if (l_BS_photolum)                                l_BS_ares_from_res=.FALSE.
  if (l_BS_magnons.and.n_sp_pol==2)                 l_BS_ares_from_res=.FALSE.
- ! The next line is to switch of section of the codes due to the calculation
+ ! The next line is to switch off sections of the code due to the calculation
  ! of the ARES part without checking the BSE_mode (see io_BS_PAR_init for example)
  if (trim(BSE_mode)=="resonant")                   l_BS_ares_from_res=.TRUE.
  !
@@ -162,6 +165,16 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  & call error('set Lkind = DEFAULT or BAR or FULL')
  !
  BS_K_is_ALDA=l_alda_fxc
+ if (l_alda_fxc) then
+   call parser('FxcLibxc',l_Fxc_Libxc)
+   l_Fxc_from_Vxc=(n_spin==1).and..not.l_Fxc_Libxc
+ endif
+#if defined _CUDA
+ if ( BS_K_is_alda.and. index(FXC_mode,"G-")>0 ) then
+   call warning(" Tddft with G-integrals is not GPU ported. Fallback to R-integrals")
+   FXC_mode="R-def"
+ endif
+#endif
  BS_dip_size=1
  !
  ! Memory saving
diff --git a/src/bse/K_exchange_collisions.F b/src/bse/K_exchange_collisions.F
index 3933c53ba8..c24cc5c5dd 100644
--- a/src/bse/K_exchange_collisions.F
+++ b/src/bse/K_exchange_collisions.F
@@ -5,12 +5,14 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine K_exchange_collisions(iq,Xk,i_T_grp)
+subroutine K_exchange_collisions(iq,Xk,i_T_grp,NG,l_bs_exch_wf_in_loop)
  !
  use pars,           ONLY:SP,cZERO
  use D_lattice,      ONLY:nsym,i_time_rev,sop_inv,sop_inv_d
- use BS,             ONLY:BS_T_grp,K_EXCH_collision,BS_oscillators_alloc,BS_n_g_exch,&
-&                         BSE_L_kind,l_BS_magnons
+ use BS,             ONLY:BS_T_grp,K_EXCH_collision,BS_n_g_exch,&
+&                         BSE_L_kind,l_BS_magnons,l_BSE_minimize_memory,BS_bands
+ use interfaces,     ONLY:WF_load,WF_free
+ use wave_func,      ONLY:WF
  use R_lattice,      ONLY:g_rot,g_rot_d,qindx_X,bz_samp
  use stderr,         ONLY:STRING_match
  use electrons,      ONLY:n_sp_pol
@@ -24,34 +26,30 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp)
  implicit none
  !
  type(bz_samp),intent(in) :: Xk
- integer,      intent(in) :: iq,i_T_grp
+ integer,      intent(in) :: iq,i_T_grp,NG(2)
+ logical,      intent(in) :: l_bs_exch_wf_in_loop
  !
  ! Work Space
  !
+ logical :: l_load_WFs
+ integer :: NK(2)
  integer :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,&
 &           i_sp,i_T_el_last_with_identity_sym,i_T_grp_last_with_identity_sym,i_T_grp_p
 #ifdef _CUDA
   complex(SP), pointer, device :: O_x_d(:,:),O_x_sym_d(:,:)
 #endif
- !
- ! Oscillators already done ?
- !
- if (allocated( BS_T_grp(i_T_grp)%O_x )) return
- !
- if (l_BS_magnons.and.n_sp_pol==2) return
  !
  call timing('T_space EXX Osc.',OPR='start')
  !
  ! Allocate
  !
  call elemental_collision_free(K_EXCH_collision)
- call BS_oscillators_alloc(.TRUE.,i_T_grp,.FALSE.,0)
  !
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp parallel default(shared), private( K_EXCH_collision, &
  !$omp &        i_T_el,i_k_bz,i_k,i_s, i_p_bz,i_p,i_sp, i_v,i_c,i_sp_c,i_sp_v, &
  !$omp &        i_T_grp_last_with_identity_sym, i_T_el_last_with_identity_sym, &
- !$omp &        i_T_grp_p, N_T_el_p, i_T_el_p, i_g1, i_g2)
+ !$omp &        i_T_grp_p, N_T_el_p, i_T_el_p, i_g1, i_g2, NK)
 #endif
  !
  call OPENMP_update(master_thread)
@@ -61,6 +59,7 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp)
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp do schedule(dynamic)
 #endif
+ NK=-1
  T_loop: do i_T_el=1,BS_T_grp(i_T_grp)%size
    !
    i_k_bz=BS_T_grp(i_T_grp)%table(i_T_el,1)
@@ -82,6 +81,20 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp)
    K_EXCH_collision%os=(/i_v,i_p,i_sp,i_sp_v/)
    K_EXCH_collision%qs=(/i_g_p,iq,1/)
    !
+   l_load_WFs= l_bs_exch_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
+   if (l_load_WFs) then
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp critical
+#endif
+     if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
+     NK=(/min(i_k,i_p),max(i_k,i_p)/)
+     call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,&
+     &            space='R',title="Kernel exch",keep_states_to_load=.true.)
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp end critical
+#endif
+   endif
+   !
    if (iq==1) then
      !
      ! G==0 term
@@ -173,6 +186,8 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp)
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp end parallel
 #endif
+ !
+ if(l_bs_exch_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
  !
  call timing('T_space EXX Osc.',OPR='stop')
  !
diff --git a/src/bse/K.F b/src/bse/K_kernel.F
similarity index 69%
rename from src/bse/K.F
rename to src/bse/K_kernel.F
index 578df49b0a..09e082c441 100644
--- a/src/bse/K.F
+++ b/src/bse/K_kernel.F
@@ -5,47 +5,47 @@
 !
 ! Authors (see AUTHORS file for details): AM DS AF
 !
-subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
+subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  !
- ! K = <2V-W> for n_spin=1
+ ! K = <2V-W> for n_spin=1; K=<V-W> for n_spin>1
  !
  use pars,          ONLY:SP,schlen,pi,cZERO,cI
- use stderr,        ONLY:intc
  use LOGO,          ONLY:pickup_a_random
  use drivers,       ONLY:l_bs_fxc,l_tddft,l_rim_w
  use frequency,     ONLY:w_samp
- use electrons,     ONLY:levels,spin_occ,spin,n_spin,n_spinor
+ use electrons,     ONLY:levels,spin_occ,spin,n_spin,n_spinor,n_sp_pol
  use FFT_m,         ONLY:fft_size
  use interfaces,    ONLY:WF_load,WF_free,eval_G_minus_G,TDDFT_ALDA_eh_space_R_kernel
  use D_lattice,     ONLY:nsym,DL_vol,i_time_rev,i_space_inv,sop_inv,sop_tab
- use R_lattice,     ONLY:G_m_G,qindx_B,bz_samp,RIM_W,&
-&                        qindx_X,qindx_B_max,qindx_B_load,&
+ use R_lattice,     ONLY:qindx_X,bz_samp,&
+&                        qindx_B,qindx_B_max,qindx_B_load,RIM_W,&
 &                        RIM_ng,RIM_W_is_diagonal,RIM_W_ng
  use com,           ONLY:msg
  use stderr,        ONLY:intc
  use LIVE_t,        ONLY:live_timing
  use X_m,           ONLY:X_t
+ use interfaces,    ONLY:el_density_and_current
  use QP_m,          ONLY:QP_ng_Vxc
  use BS,            ONLY:BS_bands,BS_res_K_corr,BS_W,BS_W_is_diagonal,&
-&                        BS_res_K_exchange,BS_Block_size,&
+&                        BS_res_K_exchange,BS_Block_size,BS_n_g_W,&
 &                        O_ng,BS_n_g_exch,BS_n_g_fxc,BS_identifier,BS_LiveTiming_steps,&
 &                        BS_K_dim,BS_K_is_ALDA,BS_cpl_K_exchange,&
 &                        BS_cpl_K_corr,K_EXCH_collision,K_CORR_collision,&
-&                        BS_oscillators_free,WF_phase,n_BS_blks,BS_blk,BS_T_grp,&
+&                        WF_phase,n_BS_blks,BS_blk,BS_T_grp,&
 &                        BS_nT_grps,BS_blks_free,l_BS_ares_from_res,&
 &                        l_BSE_minimize_memory,l_BSE_restart,l_BSE_kernel_complete,&
-&                        BS_perturbative_SOC,BS_K_cutoff,BS_max_val
+&                        BS_perturbative_SOC,BS_K_cutoff,BS_max_val,l_BS_magnons
  use collision_el,  ONLY:elemental_collision_free
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:REP,OP_WR,RD,WR_CL,OP_APP,deliver_IO_error_message,WR
  use TDDFT,         ONLY:FXC_K_diagonal,F_xc_gspace,FXC_n_g_corr,io_BS_Fxc, &
-&                        FXC_mode,tddft_wf_t
+&                        FXC_mode,tddft_wf_t,l_Fxc_from_Vxc
  use xc_functionals,ONLY:V_xc,F_xc,F_xc_mat,magn,XC_potential_driver
  use global_XC,     ONLY:WF_xc_functional,WF_kind,WF_exx_fraction
  use wave_func,     ONLY:WF
  use openmp,        ONLY:OPENMP_update,master_thread
  use timing_m,      ONLY:timing
- use parallel_m,    ONLY:myid,ncpu,PAR_K_scheme
+ use parallel_m,    ONLY:myid,ncpu,PAR_K_scheme,PARALLEL_default_mode,master_cpu
  use parallel_int,  ONLY:PP_redux_wait
  use MAGNONS,       ONLY:BSS_MAGN_free
  use PHOTOLUM,      ONLY:BSS_PL_free
@@ -78,20 +78,19 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  integer ::     i_k_bz_last,i_p_bz_last,i_pmq_bz_last,i_kmq_bz_last, & 
 &               iq_W_bz_last,ig_W_last,iq_W_bz_mq_last,ig_W_mq_last
  !
- integer    :: i_block,O_ng_shift,NG,&
+ integer    :: i_block,O_ng_shift,O_ng_exch,O_ng_tddft,iHxc,NG(2),NK(2),&
 &              ig_kmq,ig_pmq,ig_W_mq,iq_W_bz_mq,iq_W_mq,iq_W_s_mq,&
 &              bands_to_load(2),is_k(4),os_k(4),is_p(4),os_p(4)
- complex(SP):: Co,H_x,H_c
+ complex(SP):: Co,H_x,H_c,BS_mat_tmp
  complex(SP):: sqrt_f_itk,sqrt_f_itp
- real(SP), allocatable :: BS_max_val_CPU(:)
+ real(SP), allocatable :: BS_max_val_CPU(:),rho(:)
  !
  complex(SP),   external ::K_correlation_kernel_std,K_correlation_kernel_dir
  complex(SP),   external ::TDDFT_ALDA_eh_space_G_kernel
  complex(SP),   external ::K_exchange_kernel_resonant,K_exchange_kernel_coupling
  !
- ! Oscillators
- !
- logical    :: load_O_X,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum
+ logical    :: l_bs_exch,l_bs_corr,l_bs_exch_wf_in_loop,l_bs_tddft_wf_in_loop,l_matrix_init,&
+ &             l_load_kernel,l_write_kernel,l_skip_phases,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum
  !
  ! I/O
  !
@@ -149,51 +148,57 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  call section('=',trim(section_title))
  !
- ! Exchange
- !==========
- !
- load_O_X=BS_res_K_exchange.or.l_bs_fxc
+ l_bs_exch=(any((/BS_res_K_exchange,BS_cpl_K_exchange/)).or.l_bs_fxc).and..not.(l_BS_magnons.and.n_sp_pol==2)
+ l_bs_corr=(any((/BS_res_K_corr,BS_cpl_K_corr/)) .and. .not.l_bs_fxc)
  !
- if (load_O_X) call msg('r','[BSE] Exchange components ',BS_n_g_exch)
+ ! Oscillators Setup
+ !===================
  !
- if (iq/=1 .and. n_spinor==2 .and. nsym>1) then
-   ! Default is direct algorithm, unless imposed from input
-   call parser('ImposeStdAlg',l_std_alg)
-   l_dir_alg=.not.l_std_alg
- else         
-   ! Default is standard algorithm, unless imposed from input
-   call parser('ImposeDirAlg',l_dir_alg)
-   l_std_alg=.not.l_dir_alg
+ if (l_bs_exch) then
+   O_ng_exch =maxval(qindx_X(iq,:,2))
+   ! I load the WFs for exachange in a loop in two cases
+   ! - q=0 and more than one k-point.
+   !   In this case it is the default, unless I specify workload in the parallel mode
+   ! - q/=0
+   !   In this case it is activated only if I specify KQmemory in the parallel mode
+   !   The code would work fine also in this case, however it would require continuous loading and unloading of WFs.
+   !   To avoid this one would need to move the loop over i_star in K_Transitions_setup outside the
+   !   loop over ic and iv (see related comment in K_Transiontion_setup.F) 
+   l_bs_exch_wf_in_loop= ((iq==1) .and. (Xk%nbz>=1).and..not.trim(PARALLEL_default_mode)=="workload") .or. &
+   &                     ((iq> 1) .and.                      trim(PARALLEL_default_mode)=="KQmemory")
+   call msg('r','[BSE] Exchange components ',BS_n_g_exch)
  endif
  !
- l_tddft_gsum=.false.
- l_tddft_rsum=.false.
  if (BS_K_is_ALDA) then
+   O_ng_tddft =maxval(qindx_X(iq,:,2))
    l_tddft_gsum=(index(FXC_mode,"G-")>0)
    l_tddft_rsum=(index(FXC_mode,"R-")>0)
    if( .not.(l_tddft_gsum.or.l_tddft_rsum) ) call error("Wrong FXC_mode:"//trim(FXC_mode))
+   if(index(FXC_mode,"cut_GmGp")>0) then
+     FXC_n_g_corr=BS_n_g_fxc
+     if (BS_n_g_fxc/= eval_G_minus_G(BS_n_g_fxc,0) ) then
+       BS_n_g_fxc   = eval_G_minus_G(BS_n_g_fxc,0)
+       call msg('s','[TDDFT] Fxc cutoff is forced by symmetry to be ',BS_n_g_fxc)
+     endif
+   endif
+   l_bs_tddft_wf_in_loop= ((iq==1) .and. (Xk%nbz>=1).and.(.not.trim(PARALLEL_default_mode)=="workload") ) .or. &
+   &                      ((iq> 1) .and.                       trim(PARALLEL_default_mode)=="KQmemory" )
+   call msg('r','[BSE] ALDA        components ',BS_n_g_fxc)
  endif
  !
- ! Oscillators Setup
- !===================
- O_ng=1
- O_ng_shift=maxval(qindx_X(iq,:,2))
- !
- if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) then
-   O_ng=maxval(G_m_G)
-   call fft_check_size(1,O_ng,BS_n_g_exch,"Kx")
-   ! Hybrid functional.
-   ! The cutoff on the non-hybrid part, BS_n_g_fxc, need to be lowered
-   if (BS_K_is_ALDA) call fft_check_size(1,O_ng,BS_n_g_fxc,"Kx")
-   if(iq/=1) O_ng_shift=max(qindx_B_max,O_ng_shift)
- endif
- !
- if(BS_K_is_ALDA.and.index(FXC_mode,"cut_GmGp")>0) then
-   FXC_n_g_corr=BS_n_g_fxc
-   if (BS_n_g_fxc/= eval_G_minus_G(BS_n_g_fxc,0) ) then
-     BS_n_g_fxc   = eval_G_minus_G(BS_n_g_fxc,0)
-     call msg('s','[TDDFT] ALDA Fxc cutoff is forced by symmetry to be ',BS_n_g_fxc)
+ if (l_bs_corr) then
+   O_ng_shift=qindx_B_max
+   if (iq/=1 .and. n_spinor==2 .and. nsym>1) then
+     ! Default is direct algorithm, unless imposed from input
+     call parser('ImposeStdAlg',l_std_alg)
+     l_dir_alg=.not.l_std_alg
+   else
+     ! Default is standard algorithm, unless imposed from input
+     call parser('ImposeDirAlg',l_dir_alg)
+     l_std_alg=.not.l_dir_alg
    endif
+   call parser('SkipPhases',l_skip_phases)
+   call msg('r','[BSE] Correlation components ',(/BS_n_g_W,O_ng/))
  endif
  !
  call K_restart(iq,X,ID_head,ID,ID_compr,l_partial_kernel_loaded)
@@ -201,7 +206,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  if( l_BSE_kernel_complete ) return
  !
  io_QINDX_err=0
- if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) io_QINDX_err=qindx_B_init(qindx_ID,qindx_ID_frag)
+ if (l_bs_corr) io_QINDX_err=qindx_B_init(qindx_ID,qindx_ID_frag)
  if(io_QINDX_err/=0) call error('Error reading qindx_B database ')
  !
  ! Screened interaction
@@ -213,7 +218,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  if (l_rim_w) call QP_interpolate_W(X,Xw,q,'BSE')
  !
  io_X_err=0
- if (BS_res_K_corr) call K_screened_interaction(X,Xw,q,io_X_err)
+ if (l_bs_corr) call K_screened_interaction(X,Xw,q,io_X_err)
  !
  if (io_X_err<0) then
    call deliver_IO_error_message(io_X_err,'PP/Em1s')
@@ -249,30 +254,66 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    !
  endif
  !
- ! Wave Functions
- !================
- bands_to_load=BS_bands
- if (BS_K_is_ALDA) bands_to_load=(/1,BS_bands(2)/)
+ ! DB identifier 
+ !===============
+ BS_identifier=pickup_a_random(10000._SP)
+ !
+ ! BS DB description section I/O
+ !===============================
  !
- section_title='-BSK'
- if (l_bs_fxc) section_title='-BSK->Fxc'
+ call section('=','Main kernel loop')
  !
- NG=max(O_ng,BS_n_g_exch)
- if (BS_K_is_ALDA) NG=max(NG,BS_n_g_fxc)
+ if(BS_K_cutoff>0._SP) then
+   YAMBO_ALLOC(BS_max_val_CPU,(ncpu))
+   BS_max_val_CPU=0._SP
+ endif
+ !
+#if defined _PAR_IO
+ !
+ call io_control(ACTION=OP_APP,COM=REP,ID=ID_head)
+ io_BS_err=io_BS_header(iq,X,ID_head,"full")
+ !
+ call io_control(ACTION=OP_APP,COM=REP,ID=ID,DO_IT=.TRUE.)
+ io_BS_err=io_BS_PAR_init(iq,ID,"full")
+ !
+#else
+ call io_control(ACTION=OP_WR,COM=REP,SEC=(/1/),ID=ID)
+ io_BS_err=io_BS(iq,X,ID)
+#endif
+ !
+ ! This loop is on the exchange and correlation of the BSE
+ ! iHxc=1 --> K_H=v
+ ! iHxc=2 --> K_TDDFT=fxc
+ ! iHxc=3 --> K_c=W
+ !
+ l_matrix_init=.true.
+ l_load_kernel=.true.
+ l_write_kernel=.false.
+ !
+ BS_blk_done=.false.
+ !
+ do iHxc=1,3
+   !
+ if ((.not.l_bs_exch).and.iHxc==1) cycle
  !
- call WF_load(WF,NG,O_ng_shift,bands_to_load,(/1,Xk%nibz/),space='R',title=trim(section_title))
+ if ((.not.BS_K_is_ALDA).and.iHxc==2) cycle
  !
- ! Wave Functions Phases
- !=======================
- if(l_std_alg) call K_WF_phases(Xk)
- ! 
- ! Spatial Inversion Test
- !========================
- call WF_spatial_inversion(Ken,Xk)
+ if ((.not.l_bs_corr).and.iHxc==3) cycle
+ !
+ section_title='-BSK-'//trim(intc(iHxc))
+ if (l_bs_fxc) section_title='-BSK->Fxc-'//trim(intc(iHxc))
+ !
+ if (iHxc==1) NG=(/BS_n_g_exch,O_ng_exch/)
+ if (iHxc==2) NG=(/BS_n_g_fxc ,O_ng_tddft/)
+ if (iHxc==3) NG=(/O_ng       ,O_ng_shift/)
  !
  ! ALDA
  !======
- if (BS_K_is_ALDA) then
+ if (iHxc==2) then
+   !
+   call WF_load(WF,NG(1),NG(2),(/1,Ken%nbm/),(/1,Xk%nibz/),space='R',title="-TDDFT",keep_states_to_load=.true.)
+   !
+   QP_ng_Vxc=BS_n_g_fxc
    !
    if (l_tddft_rsum) then
      YAMBO_ALLOC(tddft_wf%rhotwr1,(fft_size*n_spinor*n_spinor))
@@ -280,13 +321,16 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    else if (l_tddft_gsum) then
      YAMBO_ALLOC(tddft_wf%rhotwr_DP,(fft_size))
    endif
+   !
    YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
    YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor))
    !
    YAMBO_ALLOC(F_xc,(fft_size,n_spin,n_spin))
-   YAMBO_ALLOC(V_xc,(fft_size,n_spin))
-   V_xc=0._SP
-   QP_ng_Vxc=BS_n_g_fxc
+   if(l_BS_magnons .or. n_spinor==2 .or. l_Fxc_from_Vxc) then
+     YAMBO_ALLOC(V_xc,(fft_size,n_spin))
+     V_xc=0._SP
+   endif
+   !
    if(n_spin>1) then
      YAMBO_ALLOC(magn,(fft_size,3))
    endif
@@ -294,40 +338,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    YAMBO_ALLOC(F_xc_mat,(fft_size,n_spin,n_spin,n_spin,n_spin))
    call Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
    YAMBO_FREE(F_xc)
-   YAMBO_FREE(V_xc)
+   if(l_BS_magnons .or. n_spinor==2 .or. l_Fxc_from_Vxc) then
+     YAMBO_FREE(V_xc)
+   endif
    if(n_spin>1) then
      YAMBO_FREE(magn)
    endif
+   !
    call TDDFT_ALDA_lower_cutoff(FXC_mode)
    !
+   call WF_free(WF,keep_states_to_load=.true.)
+   !
  endif
- !
- ! DB identifier 
- !===============
- BS_identifier=pickup_a_random(10000._SP)
- !
- ! BS DB description section I/O
- !===============================
- !
- call section('=','Main kernel loop')
- !
- if(BS_K_cutoff>0._SP) then
-   YAMBO_ALLOC(BS_max_val_CPU,(ncpu))
-   BS_max_val_CPU=0._SP
- endif
- !
-#if defined _PAR_IO
- !
- call io_control(ACTION=OP_APP,COM=REP,ID=ID_head,DO_IT=.TRUE.)
- io_BS_err=io_BS_header(iq,X,ID_head,"full")
- !
- call io_control(ACTION=OP_APP,COM=REP,ID=ID,DO_IT=.TRUE.)
- io_BS_err=io_BS_PAR_init(iq,ID,"full")
- !
-#else
- call io_control(ACTION=OP_WR,COM=REP,SEC=(/1/),ID=ID)
- io_BS_err=io_BS(iq,X,ID)
-#endif
  !
  ! Timing
  !========
@@ -340,10 +362,8 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  ! First load previously computed matrix elements.
  !=================================================
  !
- BS_blk_done=.false.
- !
 #ifdef _PAR_IO
- if (l_BSE_restart) then
+ if (l_BSE_restart.and.l_load_kernel) then
    !
    if (m_steps>0) call live_timing("Loading partial kernel",m_steps)
    !
@@ -380,16 +400,42 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
      !
    enddo
    !
-   call live_timing()
+   if (m_steps>0) call live_timing()
    !
    call msg("s","Kernel loaded percentual ",real(n_steps-m_steps,SP)/real(n_steps,SP)*100._SP,'[%]')
    !
+   l_load_kernel=.false.
+   !
  endif
 #endif
+ !
+ if ( (iHxc==1 .and. l_bs_exch_wf_in_loop) .or. (iHxc==2.and.l_bs_tddft_wf_in_loop.and.l_tddft_gsum)) then
+   ! DS this is used only for iq==1
+   call fft_setup(NG(1),NG(2),.true.)
+ else
+   !
+   call WF_load(WF,NG(1),NG(2),BS_bands,(/1,Xk%nibz/),space='R',&
+   &            title=trim(section_title),keep_states_to_load=.true.)
+   !
+   ! Wave Functions Phases
+   !=======================
+   if (iHxc==3.and.l_std_alg) then
+     call K_WF_phases(Xk)
+     if (l_skip_phases) then
+       WF_phase=-99._SP
+       call msg('r','[BSE] WF phases ignored as imposed in input')
+     endif
+   endif
+   !
+ endif
  !
  if (.not.l_bs_fxc) section_title='Kernel'
  if (     l_bs_fxc) section_title='Kernel->Fxc'
  !
+ if (iHxc==1) section_title=trim(section_title)//' exch'
+ if (iHxc==2) section_title=trim(section_title)//' tddft'
+ if (iHxc==3) section_title=trim(section_title)//' corr'
+ !
  if (m_steps>0) call live_timing(trim(section_title),m_steps)
  !
  !------------------
@@ -398,10 +444,17 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  block_loop:&
  do i_block=1,n_BS_blks
+   !
+   mode_now=BS_blk(i_block)%mode
+   if (.not.l_BS_ares_from_res) mode_now="F"
+   !
+   if (iHxc==1) l_write_kernel=.not.(BS_K_is_ALDA.or.l_bs_corr)
+   if (iHxc==2) l_write_kernel=.not. l_bs_corr
+   if (iHxc==3) l_write_kernel=.true.
    !
    if (BS_blk_done(i_block)) then
 #if defined _PAR_IO
-     if(l_partial_kernel_loaded) then
+     if(l_partial_kernel_loaded.and.l_write_kernel) then
        call io_control(ACTION=WR,ID=ID)
        call io_BS_PAR_block(iq,i_block,ID,"full")
      endif
@@ -414,6 +467,13 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    if (BS_LiveTiming_steps>= 0) i_steps=BS_Block_size(i_block)
    if (BS_LiveTiming_steps==-1) i_steps=1
    !
+   ! Nothing to do, just write the kernel to disk
+   if (trim(BS_blk(i_block)%mode)=="C" .and. iHxc==3 .and. .not.BS_cpl_K_corr) then
+     BS_blk(i_block)%done(:,:)="t"
+     if(BS_K_cutoff>0._SP) BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) )
+     goto 100
+   endif
+   !
    i_Tgrp_k   =BS_blk(i_block)%iT_k
    i_Tgrp_p   =BS_blk(i_block)%iT_p
    i_Tgrp_k_st=BS_blk(i_block)%iT_k_st
@@ -421,33 +481,40 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    i_k        =BS_blk(i_block)%ik
    i_p        =BS_blk(i_block)%ip
    !
-   mode_now=BS_blk(i_block)%mode
-   if (.not.l_BS_ares_from_res) mode_now="F"
-   !
    ! Exchange oscillators
    !----------------------
-   ! 
-   call K_exchange_collisions(iq,Xk,i_Tgrp_k)
-   !
-   if (i_Tgrp_k/=i_Tgrp_p) call K_exchange_collisions(iq,Xk,i_Tgrp_p)
+   if(iHxc==1) then
+     if (.not.allocated( BS_T_grp(i_Tgrp_k)%O_x) ) then
+       call BS_exchange_oscillators_alloc(i_Tgrp_k)
+       call K_exchange_collisions(iq,Xk,i_Tgrp_k,NG,l_bs_exch_wf_in_loop)
+     endif
+     if (.not.allocated( BS_T_grp(i_Tgrp_p)%O_x) ) then
+       call BS_exchange_oscillators_alloc(i_Tgrp_p)
+       call K_exchange_collisions(iq,Xk,i_Tgrp_p,NG,l_bs_exch_wf_in_loop)
+     endif
+   endif
    !
    ! TDDFT oscillators
    !----------------------
-   if(BS_K_is_ALDA.and.l_tddft_gsum) then
+   if(iHxc==2 .and. l_tddft_gsum) then
      if (.not.allocated( BS_T_grp(i_Tgrp_k)%O_tddft_L) ) then
        call TDDFT_oscillators_alloc_L(i_Tgrp_k)
-       call TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_Tgrp_k,tddft_wf,mode_now)
+       call TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_Tgrp_k,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode_now)
      endif
      if (.not.allocated( BS_T_grp(i_Tgrp_p)%O_tddft_R) ) then
        call TDDFT_oscillators_alloc_R(i_Tgrp_p)
-       call TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_Tgrp_p,tddft_wf,mode_now)
+       call TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_Tgrp_p,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode_now)
      endif
    endif
    !
    ! Correlation oscillators
    !----------------------
-   if (l_std_alg) call K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
-   if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
+   if(iHxc==3) then
+     if (l_std_alg) call K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
+     if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
+   endif
+   !
+   call timing('X T_space Kernel sum',OPR='start')
    !
    ! Transition Loops
    !------------------
@@ -458,11 +525,11 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    !$omp &       sqrt_f_itk,i_k_bz,i_v_k,i_c_k,i_k_sp_pol_c,i_k_sp_pol_v,i_k_s,is_k,os_k,i_k_s_m1,H_pos, &
    !$omp &       sqrt_f_itp,i_p_bz,i_v_p,i_c_p,i_p_sp_pol_c,i_p_sp_pol_v,i_p_s,is_p,os_p,i_kp_s, &
    !$omp &       i_kmq_bz,ig_kmq,i_kmq,i_kmq_s,i_kmq_t,i_kmq_s_m1,i_kp_mq_s, &
-   !$omp &       i_pmq_bz,ig_pmq,i_pmq,i_pmq_s,i_pmq_t, &
+   !$omp &       i_pmq_bz,ig_pmq,i_pmq,i_pmq_s,i_pmq_t,ig_W_final, &
    !$omp &       ig_W,iq_W,iq_W_s,iq_W_bz,iq_W_bz_mq,ig_W_mq,iq_W_mq,iq_W_s_mq,H_x,H_c, &
    !$omp &       i_p_bz_last,i_k_bz_last,i_pmq_bz_last,i_kmq_bz_last, &
    !$omp &       iq_W_bz_last,ig_W_last,iq_W_bz_mq_last,ig_W_mq_last, &
-   !$omp &       i_k_bz_mem,i_kmq_bz_mem,qindx_tmp,tddft_wf )
+   !$omp &       i_k_bz_mem,i_kmq_bz_mem,qindx_tmp,BS_mat_tmp,tddft_wf )
 #endif
    !
    call OPENMP_update(master_thread)   
@@ -483,8 +550,9 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
        H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
        &              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
        H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-       &              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
-       if (H_pos(1)>H_pos(2)) cycle 
+       &              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1)
+       !
+       if (H_pos(1)>H_pos(2)) cycle
        !
        i_k_bz  = BS_T_grp(i_Tgrp_k)%table(i_Tk,1) 
        i_p_bz  = BS_T_grp(i_Tgrp_p)%table(i_Tp,1) 
@@ -498,16 +566,13 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
        i_kmq_bz=qindx_X(iq,i_k_bz,1) 
        i_pmq_bz=qindx_X(iq,i_p_bz,1) 
        !
-       ig_kmq    = qindx_X(iq,i_k_bz,2)
-       ig_pmq    = qindx_X(iq,i_p_bz,2)
-       !
        i_kmq   =Xk%sstar(i_kmq_bz,1) 
        i_pmq   =Xk%sstar(i_pmq_bz,1) 
        !
        i_kmq_s =Xk%sstar(i_kmq_bz,2) 
        i_pmq_s =Xk%sstar(i_pmq_bz,2) 
        !
-       if (BS_res_K_corr) then 
+       if (iHxc==3) then 
          i_kmq_t=BS_blk(i_block)%kp_table(1,i_kmq) 
          i_pmq_t=BS_blk(i_block)%kp_table(2,i_pmq) 
        endif 
@@ -515,7 +580,7 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
        i_kmq_s_m1 = sop_inv(i_kmq_s) 
        i_kp_mq_s  = sop_tab(i_kmq_s_m1,i_pmq_s) 
        !
-       if ((BS_res_K_corr.or.BS_cpl_K_corr)) then 
+       if (iHxc==3) then 
          i_k_bz_mem=PAR_K_scheme%bz_index(i_k_bz) 
          if (i_p_bz_last/=i_p_bz.or.i_k_bz_last/=i_k_bz) then 
            i_p_bz_last=i_p_bz 
@@ -551,7 +616,10 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
        i_k_sp_pol_v=BS_T_grp(i_Tgrp_k)%table(i_Tk,5) 
        i_p_sp_pol_v=BS_T_grp(i_Tgrp_p)%table(i_Tp,5) 
        !
-       if ((BS_res_K_corr.or.BS_cpl_K_corr)) then 
+       ig_kmq    = qindx_X(iq,i_k_bz,2) 
+       ig_pmq    = qindx_X(iq,i_p_bz,2) 
+       !        
+       if (iHxc==3) then 
          i_kmq_bz_mem=PAR_K_scheme%bz_index(i_kmq_bz) 
          if (i_pmq_bz_last/=i_pmq_bz.or.i_kmq_bz_last/=i_kmq_bz) then 
            i_pmq_bz_last=i_pmq_bz 
@@ -581,14 +649,14 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
          !
        endif 
        ! 
-       if (BS_K_is_ALDA.and.l_tddft_rsum) then 
+       if (iHxc==2.and.l_tddft_rsum) then 
          is_k = (/i_c_k,i_k,i_k_s,i_k_sp_pol_c/) 
          os_k = (/i_v_k,i_kmq,i_kmq_s,i_k_sp_pol_v/) 
          is_p = (/i_c_p,i_p,i_p_s,i_p_sp_pol_c/) 
          os_p = (/i_v_p,i_pmq,i_pmq_s,i_p_sp_pol_v/) 
        endif 
        !
-       if (BS_res_K_corr.or.BS_cpl_K_corr) then 
+       if (iHxc==3) then 
          !
          if (iq_W_bz/=iq_W_bz_mq.or.iq_W/=iq_W_mq.or.iq_W_s/=iq_W_s_mq) call error("Wrong transferred momentum")
          !
@@ -609,18 +677,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
          !===================================
          !
          ! Exchange
-         if (BS_res_K_exchange) H_x=K_exchange_kernel_resonant(iq, BS_n_g_exch,               &
+         if (iHxc==1) H_x=K_exchange_kernel_resonant(iq, BS_n_g_exch,               &
 &                                   BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk)
          !
          ! ALDA
-         if (BS_K_is_ALDA) then
+         if (iHxc==2) then
            if(l_tddft_rsum)     H_x=H_x+TDDFT_ALDA_eh_space_R_kernel(is_k,os_k,is_p,os_p,tddft_wf,'RES')
            if(l_tddft_gsum)     H_x=H_x+TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc,&
 &                                   BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk)
          endif
          !
          ! Correlations
-         if (BS_res_K_corr) then
+         if (iHxc==3) then
            if (l_std_alg)       H_c=K_correlation_kernel_std(i_block,i_p,i_pmq, &
 &                                   i_k_s,i_kp_s,i_c_k,i_c_p,i_kmq_s,i_kp_mq_s,i_v_k,i_v_p,     &
 &                                   i_kmq_t,i_pmq_t,i_k_sp_pol_c,i_p_sp_pol_c,i_k_sp_pol_v,i_p_sp_pol_v,  &
@@ -637,18 +705,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
          !=============================
          !
          ! Exchange
-         if (BS_cpl_K_exchange) H_x=K_exchange_kernel_coupling(iq, BS_n_g_exch,              &
-                                   &    BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) 
+         if (iHxc==1) H_x=K_exchange_kernel_coupling(iq, BS_n_g_exch,              &
+&                                 BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk) 
          !
          ! ALDA
-         if (BS_K_is_ALDA) then
+         if (iHxc==2) then
            if(l_tddft_rsum)     H_x=H_x+TDDFT_ALDA_eh_space_R_kernel(is_k,os_k,is_p,os_p,tddft_wf,'CPL')
            if(l_tddft_gsum)     H_x=H_x+TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc,&
 &                                 BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk)
          endif
          !
          ! Correlations
-         if (BS_cpl_K_corr) then
+         if (iHxc==3) then
            if (l_std_alg)       H_c=K_correlation_kernel_std(i_block,i_p,i_pmq, &
 &                                 i_k_s,i_kp_s,i_c_k,i_v_p,i_kmq_s,i_kp_mq_s,i_v_k,i_c_p,    &
 &                                 i_kmq_t,i_pmq_t,i_k_sp_pol_c,i_p_sp_pol_v,i_k_sp_pol_v,i_p_sp_pol_c,  &
@@ -661,15 +729,18 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
          !
        end select
        !
-       if (l_tddft.and.WF_exx_fraction>0._SP) H_c=H_c*WF_exx_fraction
+       if (l_tddft.and.WF_exx_fraction>0._SP.and.iHxc==3) H_c=H_c*WF_exx_fraction
        !
        sqrt_f_itk=sqrt(cmplx(BS_T_grp(i_Tgrp_k)%f(i_Tk),kind=SP))
        sqrt_f_itp=sqrt(cmplx(BS_T_grp(i_Tgrp_p)%f(i_Tp),kind=SP))
        !
-       BS_blk(i_block)%mat(i_Tk,i_Tp)=sqrt_f_itk*(H_x*Co-H_c)*sqrt_f_itp
-       if (mode_now=="C") BS_blk(i_block)%mat(i_Tk,i_Tp)= cI*BS_blk(i_block)%mat(i_Tk,i_Tp)
+       BS_mat_tmp=sqrt_f_itk*(H_x*Co-H_c)*sqrt_f_itp
+       if (mode_now=="C") BS_mat_tmp= cI*BS_mat_tmp
        !
-       BS_blk(i_block)%done(i_Tk,i_Tp)="t"
+       if (     l_matrix_init) BS_blk(i_block)%mat(i_Tk,i_Tp) = BS_mat_tmp
+       if (.not.l_matrix_init) BS_blk(i_block)%mat(i_Tk,i_Tp) = BS_blk(i_block)%mat(i_Tk,i_Tp)+BS_mat_tmp
+       !
+       if (l_write_kernel) BS_blk(i_block)%done(i_Tk,i_Tp)="t"
        !
        i_v_k=BS_T_grp(i_Tgrp_k)%table(i_Tk,2) 
        i_c_k=BS_T_grp(i_Tgrp_k)%table(i_Tk,3) 
@@ -699,64 +770,45 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    !$omp end parallel
 #endif
    !
-   if(BS_K_cutoff>0._SP) BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) )
+   call timing('X T_space Kernel sum',OPR='stop')
    !
-   call BS_oscillators_free(0,i_block)
+   if(BS_K_cutoff>0._SP.and.l_write_kernel) &
+&     BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) )
    !
-   if(l_BSE_minimize_memory) then
-     call BS_oscillators_free(i_Tgrp_p,0)
-     call BS_oscillators_free(i_Tgrp_k,0)
-     if(l_tddft_gsum) then
-       call TDDFT_oscillators_free_L(i_Tgrp_k)
-       call TDDFT_oscillators_free_R(i_Tgrp_p)
-     endif
+   if(l_BSE_minimize_memory.and.iHxc==1) then
+     call BS_exchange_oscillators_free(i_Tgrp_k)
+     call BS_exchange_oscillators_free(i_Tgrp_p)
    endif
    !
+   if(l_BSE_minimize_memory.and.iHxc==2.and.l_tddft_gsum) then
+     call TDDFT_oscillators_free_L(i_Tgrp_k)
+     call TDDFT_oscillators_free_R(i_Tgrp_p)
+   endif
+   !
+   if(iHxc==3) call BS_correlation_oscillators_free(i_block,l_std_alg)
+   !
+100 if(l_write_kernel) then
 #if defined _PAR_IO
-   call io_control(ACTION=WR,ID=ID)
-   call io_BS_PAR_block(iq,i_block,ID,"full")
+     call io_control(ACTION=WR,ID=ID)
+     call io_BS_PAR_block(iq,i_block,ID,"full")
 #else
-   call io_control(ACTION=WR,COM=REP,SEC=(/i_block+1/),ID=ID)
-   io_BS_err=io_BS(iq,X,ID)
+     call io_control(ACTION=WR,COM=REP,SEC=(/i_block+1/),ID=ID)
+     io_BS_err=io_BS(iq,X,ID)
 #endif
+   endif
    !
    call live_timing(steps=i_steps)
    !
  enddo block_loop
  !
-#if defined _PAR_IO
- if(BS_K_cutoff>0._SP) then
-   call PP_redux_wait(BS_max_val_CPU)
-   BS_max_val=maxval(BS_max_val_CPU)
-   call K_compress(iq,ID_compr)
-   call K_compressed_IO(iq,io_BS_err,"write")
- endif
- !
- call io_control(ACTION=WR_CL,COM=REP,ID=ID_head)
- call io_control(ACTION=WR_CL,COM=REP,ID=ID)
- call io_BS_PAR_free(ID_head,ID,"full",.true.)
-#else
- call io_control(ACTION=WR_CL,COM=REP,SEC=(/n_BS_blks+2/),ID=ID)
- io_BS_err=io_BS(iq,X,ID)
-#endif
- !
- if (n_steps>0) call live_timing( )
+ if (m_steps>0) call live_timing( )
  !
  ! GLOBAL CLEANING 
  !=================
  !
- call BS_oscillators_free(BS_nT_grps,0)
+ if(iHxc==1) call BS_exchange_oscillators_free(BS_nT_grps)
  !
- if ( (BS_res_K_corr.or.BS_cpl_K_corr) .and. l_std_alg) then
-   YAMBO_FREE(WF_phase)
- endif
- !
- if (l_bs_fxc) then
-   !
-   YAMBO_FREE(F_xc_gspace)
-   YAMBO_FREE(FXC_K_diagonal)
-   !
- else if (BS_K_is_ALDA) then
+ if(iHxc==2) then
    !
    if (l_tddft_rsum) then
      YAMBO_FREE(tddft_wf%rhotwr1)
@@ -769,16 +821,50 @@ subroutine K(iq,Ken,Xk,q,X,Xw,W_bss)
    YAMBO_FREE(tddft_wf%WF_symm1)
    YAMBO_FREE(tddft_wf%WF_symm2)
    !
-   YAMBO_FREE(F_xc_mat)
+   if (l_bs_fxc) then
+     YAMBO_FREE(F_xc_gspace)
+     YAMBO_FREE(FXC_K_diagonal)
+   else if (BS_K_is_ALDA) then
+     YAMBO_FREE(F_xc_mat)
+   endif
    !
  endif
  !
- if (any((/BS_res_K_corr,BS_cpl_K_corr/)).and..not.l_bs_fxc) qindx_tmp=qindx_B_close(qindx_ID,qindx_ID_frag)
+ if(iHxc==3) then
+   if (l_std_alg) then
+     YAMBO_FREE(WF_phase)
+   endif
+   qindx_tmp=qindx_B_close(qindx_ID,qindx_ID_frag)
+   YAMBO_FREE(BS_W)
+   YAMBO_FREE(RIM_W)
+ endif
+ !
+ if ( (iHxc==1 .and. l_bs_exch_wf_in_loop) .or. (iHxc==2.and.l_bs_tddft_wf_in_loop.and.l_tddft_gsum)) then
+   call fft_free()
+ else
+   call WF_free(WF,keep_states_to_load=l_write_kernel)
+ endif
+ !
+ l_matrix_init=.false.
+ !
+ enddo ! iHxc loop
+ !
+#if defined _PAR_IO
+ if(BS_K_cutoff>0._SP) then
+   call PP_redux_wait(BS_max_val_CPU)
+   BS_max_val=maxval(BS_max_val_CPU)
+   call K_compress(iq,ID_compr)
+   call K_compressed_IO(iq,io_BS_err,"write")
+ endif
  !
- YAMBO_FREE(BS_W)
- YAMBO_FREE(RIM_W)
- call WF_free(WF)
+ call io_control(ACTION=WR_CL,COM=REP,ID=ID_head)
+ call io_control(ACTION=WR_CL,COM=REP,ID=ID)
+ call io_BS_PAR_free(ID_head,ID,"full",.true.)
+#else
+ call io_control(ACTION=WR_CL,COM=REP,SEC=(/n_BS_blks+2/),ID=ID)
+ io_BS_err=io_BS(iq,X,ID)
+#endif
  !
  call timing('X T_space Kernel',OPR='stop')
  !
-end subroutine K
+end subroutine K_kernel
diff --git a/src/bse/K_observables.F b/src/bse/K_observables.F
index e46ace829f..e5ad52a0ff 100644
--- a/src/bse/K_observables.F
+++ b/src/bse/K_observables.F
@@ -16,7 +16,7 @@ subroutine K_observables( W )
  use BS_solvers,   ONLY:run_Haydock,run_inversion,run_Diago,BSS_mode
  use BS,           ONLY:BS_dip_size,l_BS_anomalous_Hall,l_BS_abs,l_BS_jdos,l_BS_trace,BSE_prop,&
 &                       l_BS_esort,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_anomalous_Hall,l_BS_photolum,&
-&                       l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum
+&                       l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum,l_BS_kerr_asymm,l_BS_esort_indx
  use parser_m,     ONLY:parser
  use stderr,       ONLY:STRING_match
  use electrons,    ONLY:levels,n_sp_pol,n_spin
@@ -37,10 +37,13 @@ subroutine K_observables( W )
  l_BS_kerr      = STRING_match(BSE_prop,'kerr')
  l_BS_photolum  = STRING_match(BSE_prop,'photolum')
  l_BS_magnons   = STRING_match(BSE_prop,'magn') .and.n_spin>1
+ l_BS_kerr_asymm= STRING_match(BSE_prop,'asymm') .and. l_BS_kerr
+ l_BS_anomalous_Hall = STRING_match(BSE_prop,'anHall') .and. l_BS_kerr
  !
  ! How ...
  !---------
  l_BS_esort     = STRING_match(BSE_prop,'esrt')
+ l_BS_esort_indx= STRING_match(BSE_prop,'indx').and.l_BS_esort
  !
  ! How to treat dipoles...
  !------------------------
@@ -55,16 +58,44 @@ subroutine K_observables( W )
    call warning(" n_sp_pol=2 and magnons requested. Spin flip BSE only is computed")
  endif
  if (l_BS_kerr) then
-   call parser('AnHall',l_BS_anomalous_Hall)
+   if(.not.l_BS_anomalous_Hall) call parser('AnHall',l_BS_anomalous_Hall)
    BS_dip_size=2
    l_BS_abs=.TRUE.
  endif
  if (l_BS_photolum) global_gauge="velocity"
+ if ((l_BS_photolum.or.l_BS_kerr).and.l_BS_trace) then
+   call warning(" Trace not compatible with kerr or pl. Switching it off")
+   l_BS_trace  = .false.
+ endif
+ if (l_BS_dichroism.and..not.l_BS_trace) then
+   BS_dip_size=2
+   call warning(" Natural dichroism without trace")
+ !  NB: in the case without trace the quadripolar terms should be taken into account,
+ !  With trace these terms average to zero
+ !  call warning(" Trace is automatically switched on with dichroism")
+ !  l_BS_trace=.true.
+ endif
+ !
  if (l_BS_trace   ) BS_dip_size = 3
  if (l_BS_trace .and. .not.l_rpa_IP) call error(" trace coded only at the IP level")
  !
  l_BS_optics=l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism.or.l_BS_photolum
  !
+ ! Now fix BSE_prop string
+ !
+ BSE_prop=""
+ if(l_BS_abs)            BSE_prop=trim(BSE_prop)//' abs'
+ if(l_BS_jdos)           BSE_prop=trim(BSE_prop)//' jdos'
+ if(l_BS_dichroism)      BSE_prop=trim(BSE_prop)//' dich'
+ if(l_BS_kerr)           BSE_prop=trim(BSE_prop)//' kerr'
+ if(l_BS_kerr_asymm)     BSE_prop=trim(BSE_prop)//' asymm'
+ if(l_BS_anomalous_Hall) BSE_prop=trim(BSE_prop)//' anHall'
+ if(l_BS_photolum)       BSE_prop=trim(BSE_prop)//' photolum'
+ if(l_BS_magnons)        BSE_prop=trim(BSE_prop)//' magn'
+ if(l_BS_esort)          BSE_prop=trim(BSE_prop)//' esrt'
+ if(l_BS_esort)          BSE_prop=trim(BSE_prop)//' indx'
+ if(l_BS_trace)          BSE_prop=trim(BSE_prop)//' trace'
+ !
  ! Solver Logicals
  !=================
  run_Diago     = STRING_match(BSS_mode,'d')
diff --git a/src/bse/K_solvers.F b/src/bse/K_solvers.F
index 7e6ac48041..9408d1313f 100644
--- a/src/bse/K_solvers.F
+++ b/src/bse/K_solvers.F
@@ -15,7 +15,7 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  use BS_solvers,     ONLY:BSS_desc,BSS_free,BS_mat,run_Haydock,&
 &                         run_inversion,run_Diago,run_Slepc
  use BS,             ONLY:BS_K_has_been_calculated_loaded,&
-&                         BS_blks_free,l_BS_abs,l_BS_kerr,l_BS_magnons
+ &                       BS_blks_free,l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism
  use parallel_m,     ONLY:master_cpu,CPU_structure
  use IO_int,         ONLY:IO_and_Messaging_switch
  use linear_algebra, ONLY:INV,DIAGO
@@ -95,12 +95,14 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
    ! The jdos (l_BS_jdos) cannot be computed easely via the Haydock solver.
    ! it could be obtained as the average of N calls to K_Haydock
    ! with random intial vectors. See https://doi.org/10.1002/nla.2170
+   !
    ! A possible alternative could be to use the algorithm with Cf
    ! from both the left and the right hand side to get from a single Haydock loop
    !
    if (l_BS_abs)     call K_Haydock(iq,W_bss,"optics",Xk)
    if (l_BS_kerr)    call K_Haydock(iq,W_bss,"kerr",Xk)
    if (l_BS_magnons) call K_Haydock(iq,W_bss,"magnons",Xk)
+   if (l_BS_dichroism) call K_Haydock(iq,W_bss,"dichroism",Xk)
    !
  endif
  !
diff --git a/src/bz_ops/bz_samp_indexes.F b/src/bz_ops/bz_samp_indexes.F
index 30c24261f4..2e40b7695f 100644
--- a/src/bz_ops/bz_samp_indexes.F
+++ b/src/bz_ops/bz_samp_indexes.F
@@ -94,7 +94,7 @@ subroutine bz_samp_indexes(en,k,Xk,q)
  !
  if(l_nl_optics) then
    X_scattering    =.FALSE.
-   sigma_scattering=.FALSE.
+ !   sigma_scattering=.FALSE. ! I need them for the LSEX
  endif
  !
 #endif
diff --git a/src/collisions/.objects b/src/collisions/.objects
index 2d8cc5b306..22ead9636e 100644
--- a/src/collisions/.objects
+++ b/src/collisions/.objects
@@ -9,6 +9,8 @@ RT_objs = COLLISIONS_compose_rt.o
 #endif
 #if defined _NL
 NL_objs = COLLISIONS_compose_nl.o COLLISIONS_compress.o
+LSEX_objs = Build_LSEX_collisions.o LSEX_potential.o OSCLL_eval.o OSCLL_load.o \
+            OSCLL_compose_collision.o OSCLL_compose_nl.o OSCLL_compose_vbands.o 
 #endif
 #if defined _QED || defined _SC || defined _RT || defined _NL 
 objs= PLASMA_parallel_setup.o COLLISIONS_basic_operations.o \
@@ -16,5 +18,5 @@ objs= PLASMA_parallel_setup.o COLLISIONS_basic_operations.o \
       COLLISIONS_alloc_and_free.o COLLISIONS_eval.o COLLISIONS_load.o \
       COLLISIONS_linearize_and_IO.o COLLISIONS_map_to_QP_table.o \
       COLLISIONS_NEQ_GW_static.o COLLISIONS_HXC.o SCATTERING_GW_kinematics.o \
-      $(SC_objs) $(QED_objs) $(RT_objs) $(NL_objs)
+      $(SC_objs) $(QED_objs) $(RT_objs) $(NL_objs) $(LSEX_objs)
 #endif
diff --git a/src/collisions/Build_LSEX_collisions.F b/src/collisions/Build_LSEX_collisions.F
new file mode 100644
index 0000000000..4b77cebc17
--- /dev/null
+++ b/src/collisions/Build_LSEX_collisions.F
@@ -0,0 +1,82 @@
+!
+!        Copyright (C) 2000-2022 the LUMEN team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine Build_LSEX_collisions(k,q,i_qp,COLL) 
+ !
+ ! This subroutine calculate the long-range part of the Screened exchange
+ !
+ use electrons,      ONLY:n_sp_pol
+ use pars,           ONLY:SP,cI,cZERO
+ use collision_ext,  ONLY:OSCLL,W,ng_oscll,COLL_bands
+ use QP_m,           ONLY:QP_table,QP_nk,QP_n_states
+ use R_lattice,      ONLY:bz_samp,qindx_S
+ ! 
+#include<memory.h>
+ !
+ type(bz_samp), intent(in)  :: k,q
+ integer,       intent(in)  :: i_qp
+ !
+ ! Work Space
+ !
+ integer :: i_mp  ! self-energy external indexes
+ integer :: i_np,iqbz,i_kmq ! self-energy internal indexes
+ integer :: ig1,ig2
+ integer :: i_n,i_m
+ integer :: i_k
+ !
+ ! Collisions output
+ !
+ complex(SP), intent(out) :: COLL(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk)
+ !
+ ! Coll indexes
+ !
+ i_n=QP_table(i_qp,1)
+ i_m=QP_table(i_qp,2)
+ i_k=QP_table(i_qp,3)
+ !
+ ! Build Collisions from the Oscillators
+ !
+ ! COLL(n,m,n',m',k,k-q) = \sum_{G,G'} \rho(n,n',k,q,G')*\rho^*(m,m',k,q,G)*W(q,G',G)
+ !
+ do i_mp=COLL_bands(1),COLL_bands(2)
+    do i_np=COLL_bands(1),COLL_bands(2)
+      !
+      COLL(i_np,i_mp,:)=cZERO
+      ! 
+      do iqbz=1,q%nbz
+         i_kmq   = k%sstar(qindx_S(i_k,iqbz,1),1)
+         !
+         ! Build Collision on the fly
+         !
+         do ig1=1,ng_oscll
+           do ig2=1,ng_oscll
+             COLL(i_np,i_mp,i_kmq)=COLL(i_np,i_mp,i_kmq)+OSCLL(i_n,i_np,i_k,iqbz,ig2) &
+&                                                 *conjg(OSCLL(i_m,i_mp,i_k,iqbz,ig1))*W(iqbz,ig2,ig1) 
+            enddo
+         enddo
+         !
+       enddo
+   enddo
+ enddo
+ !
+end subroutine Build_LSEX_collisions
diff --git a/src/collisions/COLLISIONS_load.F b/src/collisions/COLLISIONS_load.F
index c681b7db0f..0d039f513e 100644
--- a/src/collisions/COLLISIONS_load.F
+++ b/src/collisions/COLLISIONS_load.F
@@ -124,8 +124,8 @@ subroutine COLLISIONS_load(CHECK_EXISTENCE_ONLY)
    !
    if (l_use_COH_collisions.and.l_sc_sex.and..not.l_use_Hxc_collisions) l_error=.TRUE.
    !
-   if (l_error)   call error(  ' HXC collisions not found / not compatible')
-   if (l_warning) call warning(' HXC collisions not found. Potential is computed on the fly.')
+   if (l_error)   call error(  ' HXC collisions not found/not compatible')
+   if (l_warning) call warning(' HXC collisions not found/not compatible. Potential is computed on the fly.')
    !
    l_use_scatt_collisions=all(io_err(3:4)==0).and.(l_elphoton_scatt.or.l_elphoton_corr.or.l_elel_scatt)
    if (io_err(3)/=0)                 call error(' el-el scatt collisions not found / not compatible') 
diff --git a/src/collisions/DOUBLE_project.dep b/src/collisions/DOUBLE_project.dep
index bcfe075e27..342b1d24b0 100644
--- a/src/collisions/DOUBLE_project.dep
+++ b/src/collisions/DOUBLE_project.dep
@@ -1,3 +1,4 @@
+ Build_LSEX_collisions.o
  COLLISIONS_HXC.o
  COLLISIONS_NEQ_GW_static.o
  COLLISIONS_alloc_and_free.o
@@ -5,11 +6,18 @@
  COLLISIONS_compose_nl.o
  COLLISIONS_compose_rt.o
  COLLISIONS_compose_sc.o
+ COLLISIONS_compress.o
  COLLISIONS_eval.o
  COLLISIONS_linearize_and_IO.o
  COLLISIONS_load.o
  COLLISIONS_map_to_QP_table.o
  COLLISIONS_momentum.o
+ LSEX_potential.o
+ OSCLL_compose_collision.o
+ OSCLL_compose_nl.o
+ OSCLL_compose_vbands.o
+ OSCLL_eval.o
+ OSCLL_load.o
  PLASMA_build_up.o
  PLASMA_parallel_setup.o
  PLASMA_tables_and_dimensions.o
diff --git a/src/collisions/LSEX_potential.F b/src/collisions/LSEX_potential.F
new file mode 100644
index 0000000000..2b8e5c8cc3
--- /dev/null
+++ b/src/collisions/LSEX_potential.F
@@ -0,0 +1,138 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine LSEX_potential(q,X,Xw) 
+ !
+ ! Construct the screeend potential W(G=G'=0,q)
+ !
+ use nl_optics,           ONLY:NL_bands,Correlation,LSEX
+ use collision_ext,       ONLY:W
+ use LIVE_t,              ONLY:live_timing
+ use pars,                ONLY:SP,cZERO,pi
+ use frequency,           ONLY:w_samp,W_reset
+ use R_lattice,           ONLY:bz_samp,qindx_S
+ use D_lattice,           ONLY:DL_vol
+ use electrons,           ONLY:levels,spin_occ
+ use collision_el,        ONLY:elemental_collision,elemental_collision_free, &
+&                              elemental_collision_alloc
+ use X_m,                 ONLY:X_mat,X_ALLOC_elemental,X_t
+ use IO_int,              ONLY:io_control
+ use IO_m,                ONLY:OP_RD_CL,REP,VERIFY,NONE,RD_CL,OP_RD,RD_CL_IF_END,manage_action, &
+&                              deliver_IO_error_message         
+ use matrix_operate,      ONLY:hermitian
+ use collision_ext,       ONLY:ng_oscll
+ use stderr,              ONLY:intc
+ use parallel_int,        ONLY:PP_wait
+ !
+#include<memory.h>
+ !
+ type(bz_samp), intent(in) :: q
+ type(w_samp)              :: Xw
+ type(X_t)                 :: X
+ !
+ ! Work Space
+ !
+ integer                    :: iqbz,iqibz,iqs,iqref
+ integer                    :: io_err,ID,ig1,ig2,IO_ACT
+ integer, external          :: io_X
+ type(elemental_collision)  :: isc 
+ !
+ call PP_wait()   
+ !
+ ! The number of G in X(q,G,G') is read from input
+ !
+ X%ng      =ng_oscll
+ isc%ngrho =X%ng  
+ isc%iqref =0
+ !
+ call elemental_collision_free(isc)
+ call elemental_collision_alloc(isc,NG=X%ng,NG_GAMP=(/X%ng,X%ng/),TITLE="Wlr") 
+ !
+ call X_ALLOC_elemental('X',(/X%ng,X%ng,1/))
+ ! 
+ call live_timing('[NL] W potential :',q%nbz)
+ !
+ if(Correlation==LSEX) then
+   call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),ID=ID)
+   io_err=io_X(X,Xw,ID)
+   if (io_err<0) call error('Incomplete and/or broken tatic diel. fun. database')
+ endif
+ !
+ YAMBO_ALLOC(W,(q%nbz,ng_oscll,ng_oscll))
+ iqref=0
+ !
+!$OMP WORKSHARE 
+ W=cZERO
+!$OMP END WORKSHARE 
+ ! 
+ do iqbz=1,q%nbz
+   !
+   isc%qs(2:)=(/q%sstar(iqbz,1),q%sstar(iqbz,2)/)
+   iqibz=isc%qs(2)
+   iqs  =isc%qs(3)
+   !
+   call scatter_Gamp(isc,'x')
+   !
+   ! The bare exchange v
+   !
+!$OMP WORKSHARE 
+   forall(ig1=1:X%ng)
+     W(iqbz,ig1,ig1)=-4._SP/spin_occ*pi*isc%gamp(ig1,ig1)
+   end forall
+!$OMP END WORKSHARE 
+   !
+   if(Correlation==LSEX) then
+     !      
+     call scatter_Gamp(isc,'c')
+     !
+     if(iqibz/=iqref) then
+       !      
+       IO_ACT=manage_action(RD_CL_IF_END,iqibz,1,q%nibz)
+       call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/2*iqibz,2*iqibz+1/),ID=ID)
+       io_err=io_X(X,Xw,ID)
+       !
+       call deliver_IO_error_message(io_err,'PP/Em1s',STOP_it=.TRUE.)
+       iqref=iqibz
+       !
+     endif
+     !
+     !  Screened part of the exchange  \tilde W = W - v
+     !
+!$OMP WORKSHARE 
+     forall(ig1=1:X%ng,ig2=1:X%ng)
+         W(iqbz,ig1,ig2)=W(iqbz,ig1,ig2)-4._SP/spin_occ*pi*X_mat(ig1,ig2,1)*isc%gamp(ig1,ig2)
+     end forall
+!$OMP END WORKSHARE 
+     !
+   endif
+   !
+   call live_timing(steps=1)
+   !
+ enddo
+ !
+ call live_timing()
+ !
+ call elemental_collision_free(isc)
+ call X_ALLOC_elemental('X')
+ !
+end subroutine LSEX_potential
diff --git a/src/collisions/NL_project.dep b/src/collisions/NL_project.dep
index f93c8bbabd..6174cfc54c 100644
--- a/src/collisions/NL_project.dep
+++ b/src/collisions/NL_project.dep
@@ -1,12 +1,20 @@
+ Build_LSEX_collisions.o
  COLLISIONS_HXC.o
  COLLISIONS_NEQ_GW_static.o
  COLLISIONS_alloc_and_free.o
  COLLISIONS_basic_operations.o
  COLLISIONS_compose_nl.o
+ COLLISIONS_compress.o
  COLLISIONS_eval.o
  COLLISIONS_linearize_and_IO.o
  COLLISIONS_load.o
  COLLISIONS_map_to_QP_table.o
+ LSEX_potential.o
+ OSCLL_compose_collision.o
+ OSCLL_compose_nl.o
+ OSCLL_compose_vbands.o
+ OSCLL_eval.o
+ OSCLL_load.o
  PLASMA_build_up.o
  PLASMA_parallel_setup.o
  PLASMA_tables_and_dimensions.o
diff --git a/src/collisions/OSCLL_compose_collision.F b/src/collisions/OSCLL_compose_collision.F
new file mode 100644
index 0000000000..70ef858e7e
--- /dev/null
+++ b/src/collisions/OSCLL_compose_collision.F
@@ -0,0 +1,103 @@
+!
+!        Copyright (C) 2020-2023 the Yambo team
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL) 
+ !
+ ! This subroutine use long-range part of the Screened exchange
+ ! and the oscillators to reconstruct the collisions
+ ! (at present used only for testing purpose) 
+ !
+ use pars,           ONLY:SP,cZERO
+ use nl_optics,      ONLY:NL_bands,Correlation,LSEX,LHF
+ use collision_ext,  ONLY:OSCLL,W,ng_oscll
+ use QP_m,           ONLY:QP_nk
+ use R_lattice,      ONLY:bz_samp,qindx_S
+ use parallel_m,     ONLY:PAR_Xk_ibz_index
+ use wrapper,        ONLY:V_dot_V,M_by_V
+#if defined _TIMING
+ use timing_m,       ONLY:timing
+#endif
+ ! 
+ implicit none
+ !
+ type(bz_samp), intent(in)  :: k,q
+ !
+ ! Collision indexes
+ integer,       intent(in)  :: i_n,i_m,i_k
+ !
+ ! Work Space
+ !
+ integer :: ibp  ! self-energy external indexes
+ integer :: ib,iqbz,i_kmq ! self-energy internal indexes
+ integer :: ig1,ig2,ik_mem
+ complex(SP) :: V_tmp(ng_oscll)
+ !
+ ! Collisions output
+ !
+ complex(SP), intent(out) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ !
+#if defined _TIMING
+ call timing('OSCLL compose',OPR='start')
+#endif
+ !
+ ik_mem=PAR_Xk_ibz_index(i_k)
+ !
+ ! Build Collisions from the Oscillators
+ !
+ COLL=cZERO
+ ! 
+ do iqbz=1,q%nbz
+   i_kmq   = k%sstar(qindx_S(i_k,iqbz,1),1)
+   !
+   ! Build Collision on the fly
+   !
+   if(Correlation==LSEX) then
+     !      
+     do ig1=1,ng_oscll
+       do ig2=1,ng_oscll
+!$OMP WORKSHARE	 
+	 forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
+           COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig2)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) &
+& 			      *W(iqbz,ig2,ig1) 
+         end forall
+!$OMP END WORKSHARE 
+       enddo
+     enddo
+     !
+   elseif(Correlation==LHF) then
+    do ig1=1,ng_oscll
+!$OMP WORKSHARE	 
+      forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
+        COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig1)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) &
+& 			      *W(iqbz,ig1,ig1) 
+      end forall
+!$OMP END WORKSHARE 
+    enddo
+   endif
+   !
+ enddo  ! loop in iqbz
+ !
+#if defined _TIMING
+ call timing('OSCLL compose',OPR='stop')
+#endif
+ !
+end subroutine OSCLL_compose_collision 
diff --git a/src/collisions/OSCLL_compose_nl.F b/src/collisions/OSCLL_compose_nl.F
new file mode 100644
index 0000000000..dfcd65b474
--- /dev/null
+++ b/src/collisions/OSCLL_compose_nl.F
@@ -0,0 +1,89 @@
+!
+!        Copyright (C) 2020-2023 the Yambo team
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OSCLL_compose_nl(dG,H_nl_sc,k,q,E,i_k,i_sp_pol) 
+ !
+ ! This subroutine calculate the long-range part of the Screened exchange
+ !
+ use electrons,      ONLY:n_sp_pol,levels,spin_occ
+ use pars,           ONLY:SP,cI,cZERO,cONE
+ use FFT_m,          ONLY:fft_size
+ use nl_optics,      ONLY:NL_bands,NL_nbands
+ use collision_ext,  ONLY:ng_oscll,OSCLL,W
+ use QP_m,           ONLY:QP_nk
+ use R_lattice,      ONLY:bz_samp,qindx_S,nXkibz
+ use wrapper_omp,    ONLY:V_dot_V
+ use parallel_m,     ONLY:PAR_Xk_ibz_index
+#if defined _TIMING
+ use timing_m,       ONLY:timing
+#endif
+ ! 
+ implicit none
+ !
+ complex(SP), intent(inout) :: H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
+ integer,     intent(in)    :: i_k,i_sp_pol
+ type(bz_samp), intent(in)  :: k,q
+ type(levels),  intent(in)  :: E
+ complex(SP),  intent(in)   :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ !
+ ! Work Space
+ !
+ complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+#if defined _DOUBLE
+ complex(SP), external :: ZDOTU
+#else
+ complex(SP), external :: CDOTU
+#endif
+ integer :: i1,i2,i_n,i_m
+ !
+#if defined _TIMING
+ call timing('OSCLL compose NL',OPR='start')
+#endif
+ !
+ do i_n=NL_bands(1),NL_bands(2)
+   do i_m=i_n,NL_bands(2)
+    !
+    call OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
+    !
+#if defined _DOUBLE
+        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*ZDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1)
+#else
+        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*CDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1)
+#endif
+    !
+   enddo
+ enddo
+ !
+ ! Symmetrization
+ ! 
+ do i_n=NL_bands(1),NL_bands(2)
+   do i_m=i_n+1,NL_bands(2)
+     H_nl_sc(i_m,i_n)=conjg(H_nl_sc(i_n,i_m))
+   enddo
+   H_nl_sc(i_n,i_n)=real(H_nl_sc(i_n,i_n))
+ enddo
+ !
+#if defined _TIMING
+ call timing('OSCLL compose NL',OPR='stop')
+#endif
+ !
+end subroutine OSCLL_compose_nl
diff --git a/src/collisions/OSCLL_compose_vbands.F b/src/collisions/OSCLL_compose_vbands.F
new file mode 100644
index 0000000000..d83534cdf4
--- /dev/null
+++ b/src/collisions/OSCLL_compose_vbands.F
@@ -0,0 +1,123 @@
+!
+!        Copyright (C) 2020-2023 the Yambo team
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OSCLL_compose_vbands(V_input,H_nl_sc,k,q,E,i_k,i_sp_pol) 
+ !
+ ! This subroutine calculate the long-range part of the Screened exchange
+ !
+ use electrons,      ONLY:n_sp_pol,levels,spin_occ
+ use pars,           ONLY:SP,cI,cZERO,cONE
+ use FFT_m,          ONLY:fft_size
+ use nl_optics,      ONLY:NL_bands,NL_nbands,Sigma_SEX_EQ
+ use collision_ext,  ONLY:ng_oscll,OSCLL,W
+ use QP_m,           ONLY:QP_nk
+ use R_lattice,      ONLY:bz_samp,qindx_S,nXkibz
+ use wrapper_omp,    ONLY:Vstar_dot_V_omp
+ use parallel_m,     ONLY:PAR_Xk_ibz_index
+#if defined _TIMING
+ use timing_m,       ONLY:timing
+#endif
+ ! 
+ implicit none
+ !
+ complex(SP), intent(inout) :: H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
+ integer,     intent(in)    :: i_k,i_sp_pol
+ type(bz_samp), intent(in)  :: k,q
+ type(levels),  intent(in)  :: E
+ complex(SP),  intent(in)  :: V_input(NL_bands(2),maxval(E%nbf),QP_nk,n_sp_pol)
+ !
+ ! Work Space
+ !
+ complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ complex(SP) :: V_kmq(NL_bands(1):NL_bands(2),maxval(E%nbf),q%nbz)
+ complex(SP) :: OSCLL_val(maxval(E%nbf),NL_bands(1):NL_bands(2),ng_oscll,q%nbz)
+ complex(SP) :: OSCLL_sum(q%nbz,ng_oscll,ng_oscll)
+ ! 
+#if defined _DOUBLE
+ complex(SP), external :: ZDOTU
+#else
+ complex(SP), external :: CDOTU
+#endif
+ complex(SP) :: COLL_val
+ integer :: i_kmq,i_n,i_m,iv,iqbz,ig1,ig2,ik_mem
+ !
+ ik_mem=PAR_Xk_ibz_index(i_k)
+ !
+#if defined _TIMING
+ call timing('OSCLL compose NL',OPR='start')
+#endif
+ !
+!$omp parallel do default(shared), private(iqbz,i_kmq)
+ do iqbz=1,q%nbz
+   i_kmq           = k%sstar(qindx_S(i_k,iqbz,1),1)
+   V_kmq(:,:,iqbz) = V_input(NL_bands(1):,:,i_kmq,i_sp_pol)
+ enddo
+!$omp end parallel do
+ !
+ ! Build Oscillators for the time-dependent valence bands
+ !
+!$OMP WORKSHARE
+forall(iv=1:E%nbf(i_sp_pol),iqbz=1:q%nbz,i_n=NL_bands(1):NL_bands(2),ig1=1:ng_oscll)
+   OSCLL_val(iv,i_n,ig1,iqbz)=sum(OSCLL(i_n,:,ik_mem,iqbz,ig1)*V_kmq(NL_bands(1):,iv,iqbz))
+ end forall
+!$OMP END WORKSHARE
+ !
+ ! Remove equilibrium Sigma-sex
+ !
+!$OMP WORKSHARE
+ H_nl_sc=H_nl_sc-Sigma_SEX_EQ(:,:,i_sp_pol,ik_mem)
+!$OMP END WORKSHARE
+ !
+ do i_n=NL_bands(1),NL_bands(2)
+   do i_m=i_n,NL_bands(2)
+    !
+!$OMP WORKSHARE
+    forall(iqbz=1:q%nbz,ig1=1:ng_oscll,ig2=1:ng_oscll)
+      OSCLL_sum(iqbz,ig1,ig2)=dot_product(OSCLL_val(:,i_m,ig2,iqbz),OSCLL_val(:,i_n,ig2,iqbz))
+    end forall
+!$OMP END WORKSHARE
+    !
+#if defined _DOUBLE
+    H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)+spin_occ*ZDOTU(q%nbz*ng_oscll**2,OSCLL_sum,1,W,1)
+#else
+    H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)+spin_occ*CDOTU(q%nbz*ng_oscll**2,OSCLL_sum,1,W,1)
+#endif
+    !
+   enddo
+ enddo
+ !
+ ! Symmetrization
+ ! 
+!$omp parallel do default(shared), private(i_n,i_m)
+ do i_n=NL_bands(1),NL_bands(2)
+   do i_m=i_n+1,NL_bands(2)
+     H_nl_sc(i_m,i_n)=conjg(H_nl_sc(i_n,i_m))
+   enddo
+   H_nl_sc(i_n,i_n)=real(H_nl_sc(i_n,i_n))
+ enddo
+!$omp end parallel do
+ !
+#if defined _TIMING
+ call timing('OSCLL compose NL',OPR='stop')
+#endif
+ !
+end subroutine OSCLL_compose_vbands
diff --git a/src/collisions/OSCLL_eval.F b/src/collisions/OSCLL_eval.F
new file mode 100644
index 0000000000..3de61a57da
--- /dev/null
+++ b/src/collisions/OSCLL_eval.F
@@ -0,0 +1,215 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OSCLL_eval(q,k,en) 
+ !
+ ! Evaluate oscillators for the Local-SEX
+ !
+ use LIVE_t,              ONLY:live_timing
+ use pars,                ONLY:SP,cZERO
+ use R_lattice,           ONLY:bz_samp,qindx_S
+ use electrons,           ONLY:levels,n_sp_pol
+ use wave_func,           ONLY:WF,wf_ng
+ use interfaces,          ONLY:WF_load,WF_free
+ use QP_m,                ONLY:QP_nk
+ use stderr,              ONLY:intc
+ use collision_el,        ONLY:elemental_collision,elemental_collision_free, &
+&                              elemental_collision_alloc         
+ use collision_ext,       ONLY:COLL_bands,W,OSCLL_k,ng_oscll
+ use parallel_m,          ONLY:PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID
+ use FFT_m,               ONLY:fft_size
+ use IO_int,              ONLY:io_control,IO_and_Messaging_switch
+ use IO_m,                ONLY:OP_RD_CL,VERIFY,REP,OP_WR_CL,OP_APP_CL,file_is_present,rm_file,DUMP, &
+&                              OP_IF_START_APP_CL_IF_END,manage_action 
+ use com,                 ONLY:msg
+ use parallel_int,        ONLY:PP_redux_wait,PP_wait
+ use parallel_m,          ONLY:PAR_IND_Xk_ibz,PAR_COM_Xk_ibz_INDEX,master_cpu,myid,PAR_IND_G_b,master_cpu, &
+&                              PAR_Xk_nibz,PAR_Xk_ibz_index,PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_INDEX,myid
+ !
+#include<dev_defs.h>
+#include<memory.h>
+ !  
+ type(bz_samp), intent(in) :: q,k
+ type(levels),  intent(in) :: en
+ !
+ ! Work Space
+ !
+ type(elemental_collision)            :: isc 
+ integer :: i_k,i_m,i_n,i_q,iqbz,i_kmq_G0,iqs,i_kmq,i_kmq_s,i_k_s,ik_mem
+ integer :: i_k_sp,i_kmq_sp,iqibz,ig,i_np,i_mp,io,ic,i_sp_pol,ib_idx
+ integer :: ID,io_err,iGo_max,i_fragment,max_count,i_count,IO_ACT
+ integer :: COLL_nbands,OSCLL_perc
+ logical, allocatable :: OSCLL_done(:)
+ integer, external :: io_OSCLL
+ logical :: IO_do_it
+ !
+ call PP_wait() 
+ !
+ i_sp_pol=1
+ !
+ call IO_and_Messaging_switch("SAVE")
+ call IO_and_Messaging_switch("+io_in",CONDITION=.TRUE.)
+ !   
+ call msg('s','[OSC] Checking oscillators header ')
+ call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
+ io_err=io_OSCLL(q,ID)
+ ! 
+ YAMBO_ALLOC(OSCLL_done,(QP_nk))
+ OSCLL_done=.FALSE.
+ if(io_err==0)  then
+   !
+   ! Check if all oscillators has been evaluated
+   !
+   if(master_cpu) then
+     !
+     do i_k=1,QP_nk
+       if(file_is_present('OSCLL_fragment_'//intc(i_k))) OSCLL_done(i_k)=.TRUE.
+     enddo                                                                    
+     !	    
+   endif
+   !
+   call PP_redux_wait(OSCLL_done,imode=3)
+   !    
+   if(all(OSCLL_done.eqv..TRUE.)) then 
+     return
+   else
+     OSCLL_perc=nint(count(OSCLL_done)/real(QP_nk)*100)
+     call msg('s','Restarting Oscillators calculation : '//trim(intc(OSCLL_perc))//'% done')
+   endif
+   !
+ else
+    call msg('s','[OSC] Missing or broken oscillators ')
+ endif
+ !
+ ! Oscillators not found, recalculated them
+ !
+ if(master_cpu) then
+   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
+   io_err=io_OSCLL(q,ID)
+ endif
+ !
+ max_count=120
+ do while(io_err/=0)
+   call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID)
+   io_err=io_OSCLL(q,ID)
+   if(io_err/=0) call sleep(1)
+   i_count=i_count+1
+   if((i_count==max_count).and.io_err/=0) &
+&      call error(" [OSCLL] Header not found while writing dipoles ")
+ enddo
+ !
+ call PP_wait()
+ !
+ iGo_max=maxval(qindx_S(:,:,2)) 
+ call WF_load(WF,wf_ng,iGo_max,COLL_bands,(/1,k%nibz/),space='R',title='-OSC')  
+ !
+ call elemental_collision_free(isc)
+ !
+ isc%iqref =0
+ isc%ngrho =ng_oscll  ! only G=1 is required for local-SEX
+ !
+ call elemental_collision_alloc(isc,NG=isc%ngrho,TITLE="OSCLL")
+ !
+ call live_timing('[OSC] Oscillators :',PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1))
+ !
+ YAMBO_ALLOC(OSCLL_k,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),q%nbz,ng_oscll))
+ !
+ COLL_nbands=COLL_bands(2)-COLL_bands(1)+1
+ !
+ do i_k=1,QP_nk
+   !
+   if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle
+   !
+   if(OSCLL_done(i_k)) then
+     call live_timing(steps=1)
+     cycle
+   endif
+
+   ! 
+   ik_mem=PAR_Xk_ibz_index(i_k)
+   !
+   OSCLL_k=cZERO
+   !
+   do i_n=COLL_bands(1),COLL_bands(2)
+     do i_m=COLL_bands(1),COLL_bands(2)
+       !
+       ib_idx=i_m-COLL_bands(1)+1+(i_n-COLL_bands(1))*COLL_nbands
+       !
+       if (.not.PAR_IND_G_b%element_1D(ib_idx)) cycle
+       !
+       do iqbz=1,q%nbz
+         !
+         i_kmq_G0  =qindx_S(i_k,iqbz,2)
+         iqibz     =q%sstar(iqbz,1)
+         iqs       =q%sstar(iqbz,2)
+         isc%qs  =(/i_kmq_G0,iqibz,iqs/)
+         !
+         i_k_s=1
+         i_kmq   = k%sstar(qindx_S(i_k,iqbz,1),1)
+         i_kmq_s = k%sstar(qindx_S(i_k,iqbz,1),2)
+         !
+         !  Spin-Polarization not supported!!
+         ! 
+         i_k_sp  = 1
+         i_kmq_sp= 1
+         !
+         !
+         isc%is  =(/i_n,i_k,i_k_s,i_k_sp/)
+         isc%os  =(/i_m,i_kmq,i_kmq_s,i_kmq_sp/)
+         call DEV_SUB(scatter_Bamp)(isc)            
+         !
+!$omp parallel do default(shared), private(ig)
+         do ig=1,ng_oscll
+           OSCLL_k(i_n,i_m,iqbz,ig)=isc%rhotw(ig)
+         enddo
+!$omp end parallel do
+         !
+       enddo
+       !
+    enddo
+   enddo
+   !
+   call PP_redux_wait(OSCLL_k(:,:,:,:),COMM=PAR_COM_G_b_INDEX%COMM)
+   !
+   i_fragment=i_k+(i_sp_pol-1)*k%nibz
+   !
+   IO_ACT=OP_APP_CL !manage_action(OP_IF_START_APP_CL_IF_END,i_fragment,1,k%nibz*n_sp_pol)
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+i_fragment/),MODE=VERIFY,ID=ID)
+   io_err=io_OSCLL(q,ID)
+   !
+   call live_timing(steps=1)
+   !
+ enddo
+ !
+ call IO_and_Messaging_switch("RESTORE")
+ !
+ call live_timing()
+ !
+ YAMBO_FREE(OSCLL_k)
+ !
+ call elemental_collision_free(isc)
+ call WF_free(WF)  
+ !
+ call PP_wait() 
+ !
+end subroutine OSCLL_eval
diff --git a/src/collisions/OSCLL_load.F b/src/collisions/OSCLL_load.F
new file mode 100644
index 0000000000..ae5a78d733
--- /dev/null
+++ b/src/collisions/OSCLL_load.F
@@ -0,0 +1,80 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OSCLL_load(q,k) 
+ !
+ ! Evaluate oscillators for the Local-SEX
+ !
+ use LIVE_t,              ONLY:live_timing
+ use R_lattice,           ONLY:bz_samp
+ use QP_m,                ONLY:QP_nk
+ use parallel_m,          ONLY:PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID,myid,PAR_COM_Xk_ibz_INDEX,master_cpu
+ use parallel_int,        ONLY:PP_wait
+ use IO_m,                ONLY:OP_RD,RD_CL_IF_END,VERIFY,REP,manage_action,deliver_IO_error_message
+ use IO_int,              ONLY:io_control,IO_and_Messaging_switch
+ use com,                 ONLY:msg
+ !
+#include<memory.h>
+ !
+ type(bz_samp), intent(in) :: q,k
+ !
+ ! Work Space
+ !
+ integer :: i_k,i_sp_pol
+ integer :: ID,io_err,IO_ACT
+ integer, external :: io_OSCLL
+ !
+ call PP_wait(COMM=PAR_COM_Xk_ibz_INDEX%COMM) 
+ !
+ i_sp_pol=1
+ !
+ call IO_and_Messaging_switch("SAVE")
+ call IO_and_Messaging_switch("+io_in",CONDITION=.TRUE.)
+ !   
+ call msg('s','[OSC] Checking oscillators header ')
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
+ io_err=io_OSCLL(q,ID)
+ !
+ if(io_err/=0) &
+&  call error('Oscillators not present, evaluate them before non-linear reponse: "yambo -e -v lsex" ')
+ ! 
+ ! Load Oscillators
+ !
+ call live_timing('[OSC] Load Oscillators :',PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1))
+ !
+ do i_k=1,QP_nk
+   if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle
+   IO_ACT=manage_action(RD_CL_IF_END,i_k,1,(i_sp_pol)*k%nibz)
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+i_k+(i_sp_pol-1)*k%nibz/),ID=ID)
+   io_err=io_OSCLL(q,ID)
+   call deliver_IO_error_message(io_err,'Oscillators',STOP_it=.TRUE.)
+   call live_timing(steps=1)
+ enddo
+ call live_timing()
+ !
+ call PP_wait(COMM=PAR_COM_Xk_ibz_INDEX%COMM) 
+ call IO_and_Messaging_switch("RESTORE")
+ !
+ call msg('rs','[OSC] Oscillators loaded')
+ !
+end subroutine OSCLL_load
diff --git a/src/common/eval_G_minus_G.F b/src/common/eval_G_minus_G.F
index 52383bedad..1e8ee8e9d3 100644
--- a/src/common/eval_G_minus_G.F
+++ b/src/common/eval_G_minus_G.F
@@ -18,7 +18,7 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  use vec_operate,  ONLY:iku_v_norm
  use parallel_m,   ONLY:yMPI_comm,PP_indexes,PP_indexes_reset,myid
  use parallel_int, ONLY:PP_redux_wait,PARALLEL_index,PARALLEL_live_message
- use R_lattice,    ONLY:G_m_G,g_vec,ng_in_shell,n_g_shells,E_of_shell
+ use R_lattice,    ONLY:G_m_G,g_vec,ng_in_shell,n_g_shells,E_of_shell,G_m_G_maxval
 #ifdef _CUDA
  use R_lattice,    ONLY:G_m_G_d
 #endif
@@ -34,6 +34,7 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  integer :: iG_shell,iGo_shell,iG_shell_0,iG_shell_max,iG_,iGo_,iG_alloc,iGo_alloc
  real(SP):: E_iG, E_iGo, E_max
  real(SP):: v1(3)
+ integer,  allocatable :: G_m_G_maxval_tmp(:)
  real(SP), allocatable :: E_G_m_G(:,:)
  type(PP_indexes) :: PAR_IND_G
  !
@@ -146,6 +147,9 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  !
  YAMBO_ALLOC(G_m_G,(iG_,iGo_))
  G_m_G=0
+ YAMBO_ALLOC(G_m_G_maxval_tmp,(iGo_))
+ G_m_G_maxval_tmp=0
+ G_m_G_maxval=0
  !
  !$omp parallel do default(shared), private(i2,i1,v1), schedule(dynamic)
  do i2=1,iGo_
@@ -159,11 +163,15 @@ integer function eval_G_minus_G(iG,iGo,COMM)
      !
      v1(:)=g_vec(i1,:)-g_vec(i2,:)
      G_m_G(i1,i2)=G_index(v1,.false.)
+     G_m_G_maxval_tmp(i2)=max(G_m_G_maxval_tmp(i2),G_m_G(i1,i2))
      !
    enddo
  enddo
  !$omp end parallel do
- ! 
+ !
+ G_m_G_maxval=maxval(G_m_G_maxval_tmp)
+ YAMBO_FREE(G_m_G_maxval_tmp)
+ !
  if (present(COMM)) call PP_redux_wait(G_m_G,COMM=COMM%COMM)
  !
  call PP_indexes_reset(PAR_IND_G)
diff --git a/src/driver/PHEL_project.dep b/src/driver/PHEL_project.dep
deleted file mode 100644
index e91fccffa5..0000000000
--- a/src/driver/PHEL_project.dep
+++ /dev/null
@@ -1,3 +0,0 @@
- options_projects.o
- options_yambo.o
-
diff --git a/src/el-ph/RT_project.dep b/src/el-ph/RT_project.dep
index bbc20f4200..2455333893 100644
--- a/src/el-ph/RT_project.dep
+++ b/src/el-ph/RT_project.dep
@@ -1,6 +1,2 @@
- ELPH_acoustic_phonon_properties.o
- ELPH_databases_check.o
- ELPH_databases_grids_map.o
- ELPH_databases_load.o
- ELPH_databases_symmetrize.o
+ ELPH_Sigma_c.o
 
diff --git a/src/hamiltonian/V_Hartree.F b/src/hamiltonian/V_Hartree.F
index ada08038dd..37e5c6c9f8 100644
--- a/src/hamiltonian/V_Hartree.F
+++ b/src/hamiltonian/V_Hartree.F
@@ -35,9 +35,11 @@ subroutine V_Hartree(rho,vhr)
  !
  call timing('V_Hartree',OPR='start')
  !
+!$OMP WORKSHARE  
  vhg   = (0._DP,0._DP)
  vhtmp = (0._DP,0._DP)
  rhog  = rho
+!$OMP END WORKSHARE  
  !
  ! FFT call: rho in reciprocal space
  !
@@ -71,7 +73,9 @@ subroutine V_Hartree(rho,vhr)
  call fft_3d(vhtmp,fft_dim,1)
 #endif
  !
+!$OMP WORKSHARE  
  vhr(:,1) = real(vhtmp,SP)
+ !$OMP END WORKSHARE  
  if(n_spin==2) vhr(:,2)=vhr(:,1)
  !
  call timing('V_Hartree',OPR='stop')
diff --git a/src/hamiltonian/XC_potentials.F b/src/hamiltonian/XC_potentials.F
index 928e5cd2fb..772f133d07 100644
--- a/src/hamiltonian/XC_potentials.F
+++ b/src/hamiltonian/XC_potentials.F
@@ -10,7 +10,8 @@ subroutine XC_potentials(POTENTIALS_string)
  use pars,           ONLY:schlen
  use drivers,        ONLY:l_sc_ip,l_sc_hartree,l_sc_exx,l_sc_fock,l_sc_coh,l_sc_sex,l_sc_is_libDFT,&
 &                         l_sc_srpa,l_use_Hxc_collisions,l_use_COH_collisions,l_eval_collisions
- use global_XC,      ONLY:H_SE_EXX,H_SE_FOCK,H_SE_COH,H_SE_SEX,H_SE_EXXC,H_SE_SRPA,H_SE_HARTREE
+ use global_XC,      ONLY:H_SE_EXX,H_SE_FOCK,H_SE_COH,H_SE_SEX,H_SE_EXXC,H_SE_SRPA,H_SE_HARTREE, &
+&                         H_SE_LSEX,H_SE_LHF
  use hamiltonian,    ONLY:H_kind,H_xc_functional,l_sc_XC_is_local,l_sc_V_is_local
  use xc_functionals, ONLY:XC_LDA_X,XC_LDA_C_PZ,XC_EXCHANGE_CORRELATION,XC_EXCHANGE,&
 &                         GS_xc_KIND,GS_xc_FUNCTIONAL
@@ -140,6 +141,14 @@ subroutine XC_potentials(POTENTIALS_string)
        l_use_Hxc_collisions=.TRUE.
      endif
      H_kind=H_kind+H_SE_SEX
+   else if (STRING_same(trim(potentials(i_c)),'LSEX')) then
+     l_sc_V_is_local=.false.
+     l_sc_XC_is_local=.false.
+     H_kind=H_kind+H_SE_LSEX
+   else if (STRING_same(trim(potentials(i_c)),'LHF')) then
+     l_sc_V_is_local=.false.
+     l_sc_XC_is_local=.false.
+     H_kind=H_kind+H_SE_LHF
    else if (STRING_same(trim(potentials(i_c)),'COHSEX')) then
      l_sc_coh=.true.
      l_sc_sex=.true.
diff --git a/src/interface/INIT.F b/src/interface/INIT.F
index 2b27665b4f..4a0270d393 100644
--- a/src/interface/INIT.F
+++ b/src/interface/INIT.F
@@ -482,10 +482,6 @@ subroutine LOCAL_setup_before_input_IO()
    end subroutine LOCAL_setup_before_input_IO
    !
    subroutine LOCAL_after_input_IO
-     !
-     ! q0 renormalization
-     !
-     BSS_q0(:) = BSS_q0(:)*q0_def_norm/v_norm( BSS_q0)
      !
      if (len_trim(BSE_mode)==0                ) BSE_mode="retarded"
      if (STRING_same(trim(BSE_mode),"causal") ) BSE_mode="retarded"
diff --git a/src/interface/INIT_activate.F b/src/interface/INIT_activate.F
index e3c52c1184..b4a7293536 100644
--- a/src/interface/INIT_activate.F
+++ b/src/interface/INIT_activate.F
@@ -104,7 +104,8 @@ subroutine INIT_activate()
  if (l_nl_optics) then
    call CPU_activate("NL")
    call CPU_activate("DIP")
-   call initactivate(1,'DIP_Threads NL_Threads')
+   call CPU_activate("OSCLL")
+   call initactivate(1,'DIP_Threads NL_Threads OSCLL_Threads')
  endif
 #endif
  !
@@ -122,7 +123,7 @@ subroutine INIT_activate()
  !
  !Setup
  !
- if (l_setup) call initactivate(1,'MaxGvecs Gthresh K_grids IkSigLim IkXLim NoDiagSC')
+ if (l_setup) call initactivate(1,'MaxGvecs Gthresh K_grids IkSigLim IkXLim NoDiagSC EvalMagn')
 #if defined  _ELPH 
  if (l_setup) call initactivate(1,'BSEscatt')
 #endif
@@ -205,7 +206,7 @@ subroutine INIT_activate()
    !
    ! Dipoles & Gauges
    !
-   call initactivate(1,'DipApproach DipComputed DipPDirect ShiftedPaths Gauge AnHall')
+   call initactivate(1,'DipApproach DipComputed DipPDirect ShiftedPaths Gauge')
    !
  endif
  !
@@ -227,7 +228,8 @@ subroutine INIT_activate()
    ! Special case: The BSE equation is used to build up the BSE_Fxc kernel.
    if (l_bs_fxc) then
      call initactivate(1,'FxcGRLc FxcSVdig FxcRetarded FxcMEStps')
-     call initactivate(1,'BLongDir BEnRange BDmRange BEnSteps')
+     call initactivate(1,'BLongDir QPropDir BEnRange BDmRange BEnSteps')
+     !call initactivate(1,'EFieldDir QPropDir BEnRange BDmRange BEnSteps')
    endif
    !
  endif
@@ -243,7 +245,7 @@ subroutine INIT_activate()
    call INIT_RT_ctl_switch('R')
 #endif
    !
-   call initactivate(1,'NoCondSumRule MetDamp AnHall')
+   call initactivate(1,'NoCondSumRule MetDamp')
    !
 #if defined _NL || defined _SC
    call initactivate(1,'EvPolarization FrSndOrd')
@@ -265,7 +267,8 @@ subroutine INIT_activate()
  !
  if (l_optics.and.l_bse.and.l_bss.or.(l_bse.and.l_rpa_IP)) then
    !
-   call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps BLongDir')
+   call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps BLongDir QPropDir')
+   !call initactivate(1,'BEnRange BDmRange BDmERef BEnSteps EFieldDir QPropDir')
    call initactivate(1,'BSEprop BSEdips')
    !
    ! Special case: the BSE_Fxc kernel has been constructed
@@ -549,9 +552,10 @@ subroutine INIT_activate()
 #if defined _NL
  if (l_nl_optics) then
    call initactivate(1,'FFTGvecs NLBands NLverbosity NLstep NLtime NLintegrator NLCorrelation NLLrcAlpha')
+   call initactivate(1,'TestOSCLL')
    if(.not.l_nl_p_and_p) call initactivate(1,'NLEnRange NLEnSteps NLrotaxis NLAngSteps')
    call initactivate(1,'NLDamping RADLifeTime UseDipoles FrSndOrd NoComprCOLL EvalCurrent InducedField FrPolPerdic') 
-   call initactivate(1,'Gauge RADLifeTime HARRLvcs EXXRLvcs') 
+   call initactivate(1,'TestOSCLL Gauge RADLifeTime HARRLvcs EXXRLvcs CORRLvcs') 
    call init_QP_ctl_switch('G')
  endif
  !
diff --git a/src/interface/INIT_load.F b/src/interface/INIT_load.F
index 54a3779ebb..fd1f595448 100644
--- a/src/interface/INIT_load.F
+++ b/src/interface/INIT_load.F
@@ -35,7 +35,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  use BS,            ONLY:BSE_mode,BSE_prop,BSK_mode,BS_eh_en,BS_eh_win,BS_q,BS_bands,&
 &                        BS_n_g_W,BS_n_g_exch,BS_n_g_fxc,BSE_L_kind,BS_K_cutoff,BSK_IO_mode,BSE_dipole_geometry
  use BS_solvers,    ONLY:BSS_mode,Haydock_threshold,Haydock_iterIO,BSS_n_freqs,&
-&                        BSS_dr,BSS_er,BSS_q0,BSS_damp_reference,BSS_inversion_mode,&
+&                        BSS_dr,BSS_er,BSS_E_dir,BSS_Q_dir,BSS_damp_reference,BSS_inversion_mode,&
 &                        BSS_Wd,K_INV_EPS,K_INV_PL,BSS_n_eig,Haydock_iterMAX
 #if defined _SLEPC && !defined _NL
  use BS_solvers,    ONLY:BSS_slepc_target_E,BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_maxit,&
@@ -52,7 +52,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 #endif
 #if defined _SC || defined _RT || defined _QED
  use hamiltonian,   ONLY:H_ref_bands,H_potential
- use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands
+ use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands,ng_oscll
 #endif
  use QP_m,          ONLY:SC_E_threshold
 #if defined _SC
@@ -94,7 +94,8 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 &                               RES_tresh,FAN_deltaE_treshold,DW_deltaE_treshold,E_kpq_sh_fact,gkkp_db
 #endif
 #if defined _OPENMP
- use openmp,        ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K,n_threads_NL
+ use openmp,        ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K,n_threads_NL, &
+&                        n_threads_OSCLL
 #endif
 #if defined _SCALAPACK
  use SLK_m,         ONLY:SLK_test_H_dim
@@ -208,6 +209,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'K_Threads',       '[OPENMP/BSK] Number of threads for response functions',n_threads_K)
  call it(defs,'X_Threads',       '[OPENMP/X] Number of threads for response functions',n_threads_X)
  call it(defs,'DIP_Threads',     '[OPENMP/X] Number of threads for dipoles',n_threads_DIP)
+ call it(defs,'OSCLL_Threads',    '[OPENMP/X] Number of threads for Oscillators',n_threads_OSCLL)
  call it(defs,'SE_Threads',      '[OPENMP/GW] Number of threads for self-energy',n_threads_SE)
  call it(defs,'RT_Threads',      '[OPENMP/RT] Number of threads for real-time',n_threads_RT)
  call it(defs,'NL_Threads',      '[OPENMP/NL] Number of threads for nl-optics',n_threads_NL)
@@ -296,12 +298,14 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it('f',defs,'NoCondSumRule' ,'[BSE/X] Do not impose the conductivity sum rule in velocity gauge',verb_level=V_resp)
  call it('f',defs,'MetDamp' ,      '[BSE] Define '//slash//'w+=sqrt('//slash//'w*('//slash//'w+i'//slash//'eta))',verb_level=V_resp)
  call it(defs,'BSSmod',    '[BSS] (h)aydock/(d)iagonalization/(s)lepc/(i)nversion/(t)ddft`',BSS_mode,protect=.FALSE.)
- call it(defs,'BSEprop',   '[BSS] Can be any among abs/jdos/kerr/magn/dich/photolum/esrt',BSE_prop)
+ call it(defs,'BSEprop',   '[BSS] Can be any among abs/jdos/kerr/asymm/anHAll/magn/dich/photolum/esrt',BSE_prop)
  call it(defs,'BSEdips',   '[BSS] Can be "trace/none" or "xy/xz/yz" to define off-diagonal rotation plane',BSE_dipole_geometry)
  call it(defs,'BSSInvMode','[BSS] Inversion solver modality `(f)ull/(p)erturbative`',BSS_inversion_mode)
  call it(defs,'BSSInvPFratio','[BSS] Inversion solver. Ratio between the number of frequencies solved pert/full',&
 &             K_INV_EPS%PERT_FULL_ratio)
- call it(defs,'BLongDir',  '[BSS] [cc] Electric Field',BSS_q0)
+ call it(defs,'BLongDir',  '[BSS] [cc] Electric Field versor',BSS_E_dir)
+ !call it(defs,'EFieldDir',  '[BSS] [cc] Electric Field versor',BSS_E_dir)
+ call it(defs,'QPropDir',   '[BSS] [cc] Propagation versor Field',BSS_Q_dir,verb_level=V_resp)
  call it(defs,'BEnRange',  '[BSS] Energy range',BSS_er,E_unit)
  call it(defs,'BDmRange',  '[BSS] Damping range',BSS_dr,E_unit)
  call it(defs,'BSHayTrs',  '[BSS] Relative [o/o] Haydock threshold. Strict(>0)/Average(<0)',Haydock_threshold)
@@ -356,7 +360,6 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  ! BSE properties: kerr, magnons, absorption, ...
  !
- call it('f',defs,'AnHall'  , '[BSE] Add the anomalous Hall effect to eps if using length gauge',verb_level=V_resp)
  call it(defs,'PL_weights','[PL] [cc] Weights of the carthesian components of the emitted radiation',PL_weights,verb_level=V_resp)
  !
 #if defined _RT
@@ -412,7 +415,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  call it(defs,'TwoAlpha',   '[RT] C_nk ~ alpha*Gamma_nk^2 ',two_alpha,Verb_level=V_real_time)
  call it(defs,'GrKind',     '[RT] G-ret kind: Lorentzian (QP)/ Hyperbolic QP_secant (HS)',Gr_kind,Verb_level=V_real_time)
- call it(defs,'RADLifeTime','[RT] Radiative life-time (if negative Yambo sets it equal to Phase_LifeTime in NL)', &
+ call it(defs,'RADLifeTime','[RT] Radiative life-time (if negative RADLifeTime=Phase_LifeTime)', &
 &         RAD_LifeTime,unit=Time_unit(1))
  call it(defs,'RADmagnific','[RT] Radiative life-time magnification',RAD_magnification,Verb_level=V_real_time)
  call it(defs,'PhLifeTime', '[RT] Constant Dephasing Time',Phase_LifeTime,unit=Time_unit(1))
@@ -513,7 +516,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'NLstep',       '[NL] Time step length',RT_step,unit=Time_unit(1),Verb_level=V_nl_optics)
  call it(defs,'NLtime',       '[NL] Simulation Time',NE_tot_time,unit=Time_unit(1))
  call it(defs,'NLintegrator', '[NL] Integrator ("EULEREXP/RK2/RK4/RK2EXP/HEUN/INVINT/CRANKNIC")',Integrator_name)
- call it(defs,'NLCorrelation','[NL] Correlation ("IPA/HARTREE/TDDFT/LRC/LRW/JGM/SEX") ',NL_correlation)
+ call it(defs,'NLCorrelation','[NL] Correlation ("IPA/HARTREE/TDDFT/LRC/LRW/JGM/SEX/LSEX/LHF") ',NL_correlation)
  call it(defs,'NLLrcAlpha',   '[NL] Long Range Correction',NL_LRC_alpha)
  call it(defs,'NLDamping',    '[NL] Damping (or dephasing)',NL_damping,unit=E_unit)
  call it(defs,'NLEnRange',    '[NL] Energy range (for loop on frequencies NLEnSteps/=0',NL_er,E_unit)
@@ -521,6 +524,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'NLrotaxis',    '[NL] Rotation axis (for the loop on angles NLAngSteps/=0)',NL_rot_axis)
  call it(defs,'NLEnSteps',    '[NL] Energy steps for the loop on frequencies',n_frequencies)
  call it('f',defs,'UseDipoles','[NL] Use Covariant Dipoles (just for test purpose)',verb_level=V_nl_optics)
+ call it('f',defs,'TestOSCLL','[NL] Test Oscillators vs Full Collisions [for testing purposes]',verb_level=V_nl_optics)
  call it('f',defs,'FrSndOrd','[NL] Force second order in Covariant Dipoles',verb_level=V_nl_optics)
  call it('f',defs,'NoComprCOLL','[NL] Load collisions in double-precision and do not remove small elements (default false)',&
 &  verb_level=V_nl_optics)
@@ -782,6 +786,7 @@ subroutine CPU_structure_load()
        if (i_s==8) possible_fields='(w,k) '      ! NL
        if (i_s==9) possible_fields='(k) '        ! BZ
        if (i_s==10)possible_fields='(q,k,b) '    ! PH_SE
+       if (i_s==11)possible_fields='(k) '        ! OSCLL
        !
        if (len_trim(CPU_structure(i_s)%Short_Description)==0) cycle
        name=trim(CPU_structure(i_s)%Short_Description)//"_CPU"
diff --git a/src/io/.objects b/src/io/.objects
index 6962cdbb0c..ca96eba112 100644
--- a/src/io/.objects
+++ b/src/io/.objects
@@ -15,6 +15,10 @@ ELPH_objects = variables_ELPH.o io_ELPH.o io_gFsq.o
 #if defined _PHEL 
 PHEL_objects = io_PH.o
 #endif
+COLL_objects =
+#if defined _SC || _RT 
+COLL_objects = io_OSCLL.o 
+#endif
 objs = ver_is_gt_or_eq.o \
        io_Vnl.o       io_E_SOC_map.o \
        io_RIM.o io_RIM_W.o    io_GROT.o     $(ELPH_objects) $(PHEL_objects) \
@@ -26,4 +30,4 @@ objs = ver_is_gt_or_eq.o \
        io_header.o io_Double_Grid.o \
        io_COL_CUT.o    io_DB1_selective_scan.o variables_X.o variables_DIPOLES.o \
        io_out_of_date.o \
-       $(SC_objects) $(RT_objects) $(NL_objects) 
+       $(SC_objects) $(RT_objects) $(NL_objects) $(COLL_objects)  
diff --git a/src/io/DOUBLE_project.dep b/src/io/DOUBLE_project.dep
index 50ca185728..6600258148 100644
--- a/src/io/DOUBLE_project.dep
+++ b/src/io/DOUBLE_project.dep
@@ -17,6 +17,7 @@
  io_MPA.o
  io_NL.o
  io_NLCC_pwscf.o
+ io_OSCLL.o
  io_PH.o
  io_QINDX.o
  io_QP_and_GF.o
diff --git a/src/io/RT_project.dep b/src/io/RT_project.dep
index a4625c321c..544ef7e355 100644
--- a/src/io/RT_project.dep
+++ b/src/io/RT_project.dep
@@ -1,5 +1,6 @@
  io_DB1.o
  io_KB_real_space.o
+ io_OSCLL.o
  io_RT_components.o
  io_RT_components_G_lesser.o
  io_RT_components_OBS.o
diff --git a/src/io/SC_project.dep b/src/io/SC_project.dep
index 863d79b747..09f8e8df32 100644
--- a/src/io/SC_project.dep
+++ b/src/io/SC_project.dep
@@ -1,5 +1,6 @@
  io_DB1.o
  io_KB_real_space.o
+ io_OSCLL.o
  io_SC_components.o
  load_SC_components.o
  variables_SC.o
diff --git a/src/io/io_BSS_diago.F b/src/io/io_BSS_diago.F
index 05e263801c..600244a95e 100644
--- a/src/io/io_BSS_diago.F
+++ b/src/io/io_BSS_diago.F
@@ -6,7 +6,7 @@
 ! Authors (see AUTHORS file for details): AM
 !
 integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
-&                             bsL_magn,bsR_magn,bsR_kerr,bsR_pl)
+&                             bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,bsR_pl)
  !
  use pars,          ONLY:SP,schlen,IP
  use stderr,        ONLY:intc
@@ -25,7 +25,7 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
  integer                        :: iq,ID,i_BS_mat
  complex(SP)        , optional  :: bsE(:)
  real(SP)   ,pointer, optional  :: BsE_corr(:,:),bsR_pl(:,:)
- complex(SP),pointer, optional  :: bsR_kerr(:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
+ complex(SP),pointer, optional  :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
  !
  ! Work Space
  !
@@ -127,6 +127,13 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
      endif
    endif
    !
+   if(present(bsR_dich)) then
+     if(associated(bsR_dich)) then
+       call io_bulk(ID,'BS_DICH_Residuals',VAR_SZ=(/2,BSS_n_eig,2/))
+       call io_bulk(ID,C2=bsR_dich)
+     endif
+   endif
+   !
    if(present(bsR_pl)) then
      if(associated(bsR_pl)) then
        call io_bulk(ID,'BS_PL_Residuals',VAR_SZ=(/2,BSS_n_eig/))
diff --git a/src/io/io_OSCLL.F b/src/io/io_OSCLL.F
new file mode 100644
index 0000000000..1907ba342f
--- /dev/null
+++ b/src/io/io_OSCLL.F
@@ -0,0 +1,141 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+integer function io_OSCLL(q,ID)
+ !
+ use pars,          ONLY:SP,schlen,IP_YIO
+ use electrons,     ONLY:levels
+ use matrix_operate,ONLY:mat_c2r,mat_r2c
+ use R_lattice,     ONLY:nXkibz,bz_samp
+ use QP_m,          ONLY:QP_nk
+ use IO_int,        ONLY:io_connect,io_disconnect,io_header,io_fragment, &
+&                        def_variable_bulk,io_variable_bulk,io_variable_elemental,def_variable_elemental    
+ use IO_m,          ONLY:io_sec,frag_DIP,read_is_on,write_is_on,io_extension,&
+&                        RD_CL_IF_END,RD_CL,io_action,IO_NOT_ALLOWED,io_DIP, &
+&                        io_status,io_PAR_cpu
+ use collision_ext, ONLY:COLL_bands,OSCLL,OSCLL_k,ng_oscll
+ use parallel_m,    ONLY:PAR_Xk_ibz_index,PAR_Xk_nibz
+#if defined _TIMING
+ use timing_m,      ONLY:timing
+#endif
+ !
+#include<memory.h>
+ !
+ integer     ::ID
+ type(bz_samp), intent(in) :: q
+ !
+ ! Work Space
+ !
+ integer :: ik,ik_mem,i_fragment,COLL_nbands,i_sp_pol,iqbz,ig,ib,ID_frag
+ character(schlen)    :: VAR_name
+ real(SP),allocatable :: disk_data(:,:,:)
+ !
+#if defined _TIMING
+ call timing('io_OSCLL',OPR='start')
+#endif
+ !
+ ID_frag=0
+ io_OSCLL=0
+ !
+ io_extension(ID)='OSCLL'
+ io_OSCLL=io_connect(desc=trim(io_extension(ID)),type=2,ID=ID,FORCE_READ_MODE=(.not.any((/io_sec(ID,:)==1/))))
+ !
+ if (io_OSCLL/=0) goto 1
+ !
+ if (any((/io_sec(ID,:)==1/))) then
+   !
+   io_OSCLL=io_header(ID,R_LATT=.true.,WF=.true.,IMPOSE_SN=.true.)
+   if (io_OSCLL/=0) goto 1
+   !
+   ! Variable for the Oscillators
+   !
+   call def_variable_elemental(ID,"ng_oscll",1,IP_YIO,0)
+   call io_variable_elemental(ID,VAR="RL vectors in oscillators",I0=ng_oscll,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID,"COLL_bands",2,IP_YIO,0)
+   call io_variable_elemental(ID,VAR="Oscillators bands range",I1=COLL_bands,CHECK=.true.,OP=(/"==","=="/))
+   !
+   io_OSCLL=io_status(ID)
+   if (io_OSCLL/=0) goto 1
+   !
+ endif
+ !
+ i_fragment=maxval(io_sec(ID,:))-1
+ !
+ if(i_fragment<=nXkibz) then
+   i_sp_pol=1
+   ik=i_fragment
+ else
+   i_sp_pol=2
+   ik=i_fragment-nXkibz
+ endif
+ !
+ if (ik==0) goto 1
+ !
+ ik_mem=PAR_Xk_ibz_index(ik)
+ !
+ COLL_nbands=COLL_bands(2)-COLL_bands(1)+1
+ YAMBO_ALLOC(disk_data,(COLL_nbands,q%nbz,2))
+ !
+ if (read_is_on(ID)) then
+   if(.not.allocated(OSCLL)) then
+     YAMBO_ALLOC(OSCLL,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),PAR_Xk_nibz,q%nbz,ng_oscll))
+   endif
+ endif
+ !
+ ! Fragmentation
+ !
+ call io_fragment(ID,ID_frag,i_fragment=i_fragment)
+ !
+ ! Manage RD_CL_IF_END
+ !
+ if (io_action(ID)==RD_CL_IF_END.and.ik==nXkibz) io_action(ID)=RD_CL
+ !
+ do ib=COLL_bands(1),COLL_bands(2)
+   do ig=1,ng_oscll
+     !
+     if (write_is_on(ID)) call mat_c2r(OSCLL_k(ib,:,:,ig),disk_data)
+     !
+     write (VAR_name,'(3(a,i4.4))') 'OSCLL_',ik,'_ib_',ib,'_ig_',ig
+     call def_variable_bulk(ID_frag,trim(VAR_name),1,shape(disk_data),SP)
+     call io_variable_bulk(ID_frag,1,R3=disk_data)
+     !
+     io_OSCLL=io_status(ID)
+     if (io_OSCLL/=0) goto 1
+     !
+     if (read_is_on(ID)) call mat_r2c(disk_data,OSCLL(ib,:,ik_mem,:,ig))
+     !
+   enddo
+ enddo
+ !
+ YAMBO_FREE(disk_data)
+ !
+1 call io_fragment_disconnect(ID,ID_frag)
+ !
+ call io_disconnect(ID=ID)
+ !
+#if defined _TIMING
+ call timing('io_OSCLL',OPR='stop')
+#endif
+ !
+end function
diff --git a/src/io/io_X.F b/src/io/io_X.F
index d562fce064..c91c207b72 100644
--- a/src/io/io_X.F
+++ b/src/io/io_X.F
@@ -227,6 +227,7 @@ integer function io_X(X,Xw,ID)
  endif
  !
 1 call io_fragment_disconnect(ID,ID_frag)
+ ! 
  !
  call io_disconnect(ID)
  !
diff --git a/src/io/variables_NL.F b/src/io/variables_NL.F
index ea1d562f7f..2721376091 100644
--- a/src/io/variables_NL.F
+++ b/src/io/variables_NL.F
@@ -11,6 +11,7 @@ integer function variables_NL(ID)
  use units,         ONLY:AUT2FS, HA2EV, AU2KWCMm2
  use nl_optics,     ONLY:NL_damping,l_use_DIPOLES,NL_correlation,NL_er,NL_correlation,n_frequencies,      &
 &                        NL_desc,NL_rot_axis,n_angles,NL_initial_versor
+ use collision_ext, ONLY:ng_oscll
  use QP_m,          ONLY:QP_ng_SH,QP_ng_Sx
  use real_time,     ONLY:Integrator_name,NE_steps,RT_step,l_RT_induced_field,RT_bands,RAD_LifeTime
  use electric,      ONLY:l_force_SndOrd
@@ -86,10 +87,14 @@ integer function variables_NL(ID)
  call def_variable_elemental(ID,"QP_ng_Sx",1,IP_YIO,0)
  call io_variable_elemental(ID,VAR="[NL] Number of G in EXCHANGE(DFT)",I0=QP_ng_Sx,CHECK=.true.,OP=(/"=="/), &
      &         DESCRIPTOR=NL_desc)
-
+ call def_variable_elemental(ID,"ng_OSCLL",1,IP_YIO,0)
+ call io_variable_elemental(ID,VAR="[NL] Number of G in LSEX",I0=ng_oscll,CHECK=.true.,OP=(/"=="/), &
+     &         DESCRIPTOR=NL_desc)
  call def_variable_elemental(ID,"RAD_LifeTime",1,SP,0)
  call io_variable_elemental(ID,VAR="[NL] Radiative Life-Time",R0=RAD_LifeTime,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=NL_desc)
  !
+ ! INTEGRATOR
+ !
  call def_variable_elemental(ID,"Integrator",1,CR_YIO,0)
  call io_variable_elemental(ID,CH0=Integrator_name,VAR=' [RT] Integrator           ',CHECK=.true.,OP=(/"=="/), &
 &        DESCRIPTOR=NL_desc)
diff --git a/src/io_parallel/io_BS_PAR_free.F b/src/io_parallel/io_BS_PAR_free.F
index c1e55aaf22..bc89103e75 100644
--- a/src/io_parallel/io_BS_PAR_free.F
+++ b/src/io_parallel/io_BS_PAR_free.F
@@ -10,6 +10,7 @@ subroutine io_BS_PAR_free(ID_head,ID,mode,l_kernel_complete)
  ! close BSE netcdf parallel file
  !
  use pars,          ONLY:SP
+ use parallel_m,    ONLY:master_cpu
  use parallel_int,  ONLY:PP_wait
  use BS,            ONLY:l_BSE_kernel_complete,BS_K_has_been_calculated_loaded,BS_K_cutoff
  use IO_m,          ONLY:read_is_on,write_is_on,io_BS_K
@@ -31,7 +32,7 @@ subroutine io_BS_PAR_free(ID_head,ID,mode,l_kernel_complete)
      BS_K_has_been_calculated_loaded=l_kernel_complete
      l_BSE_kernel_complete          =l_kernel_complete.and.(io_BS_K.or.BS_K_cutoff>0._SP)
    endif
-   if(write_is_on(ID_head).or.read_is_on(ID_head)) then
+   if((write_is_on(ID_head).and.master_cpu).or.read_is_on(ID_head)) then
      io_BS=variables_BS_more(ID_head,'end')
      call io_disconnect(ID_head)
    endif
diff --git a/src/io_parallel/io_BS_header.F b/src/io_parallel/io_BS_header.F
index 1af17823f2..b1d97b6782 100644
--- a/src/io_parallel/io_BS_header.F
+++ b/src/io_parallel/io_BS_header.F
@@ -14,6 +14,7 @@ integer function io_BS_header(iq,X,ID,mode)
  use pars,          ONLY:schlen,lchlen
  use stderr,        ONLY:intc
  use X_m,           ONLY:X_t
+ use parallel_m,    ONLY:master_cpu
  use BS,            ONLY:BS_K_cutoff_done,l_BSE_restart,l_BSE_kernel_complete
  use BS_solvers,    ONLY:variables_BS,BSS_desc
  use IO_m,          ONLY:frag_BS_K,write_is_on,read_is_on,io_mode,io_BS_K,DUMP
@@ -57,6 +58,8 @@ integer function io_BS_header(iq,X,ID,mode)
    !
  endif
  !
+ if(.not. (read_is_on(ID) .or. (write_is_on(ID).and.master_cpu)) ) return
+ !
  db_name='BS_head_Q'//trim(intc(iq))
  !
  io_BS_header=io_connect(desc=trim(db_name),type=2,ID=ID)
@@ -64,10 +67,6 @@ integer function io_BS_header(iq,X,ID,mode)
  if (trim(mode)=="connect") return
  !
  ! Check if the file already contains data and it is consistent
- ! DS: warning. This is potentially dangerous since all MPI tasts are all writing
- !              both the header and the variables
- !              I cannot put here if(master_cpu) because I need all MPI_tasks to
- !              go through the definition of the variables (see similar issue in io_X.F)
  !
  io_BS_header=io_header(ID,QPTS=.true.,R_LATT=.true.,WF=.true.,&
 &                       IMPOSE_SN=.true.,XC_KIND="K_WF force Xs",CUTOFF=.true.,FRAG=frag_BS_K)
diff --git a/src/linear_algebra/MATRIX_slepc.F b/src/linear_algebra/MATRIX_slepc.F
index 7335cb4629..e61ba3f0fe 100644
--- a/src/linear_algebra/MATRIX_slepc.F
+++ b/src/linear_algebra/MATRIX_slepc.F
@@ -27,7 +27,7 @@ subroutine MATRIX_slepc(M_slepc,l_target_energy,n_eig,V_right,V_left,E_real,E_cm
  use stderr,        ONLY : intc
  use BS_solvers,    ONLY : BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_maxit,&
  &                         BSS_slepc_tol,BSS_slepc_target_E,BSS_slepc_precondition,&
- &                         BSS_slepc_approach,BSS_slepc_mpd,BSS_slepc_matrix_format
+ &                         BSS_slepc_approach,BSS_slepc_matrix_format,BSS_slepc_mpd
  !
  use petscsys
  use petscmat
diff --git a/src/modules/SET_defaults.F b/src/modules/SET_defaults.F
index 204ecc41fc..77191bc0da 100644
--- a/src/modules/SET_defaults.F
+++ b/src/modules/SET_defaults.F
@@ -47,14 +47,14 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  use wave_func,  ONLY:wf_ng,wf_norm_test,wf_nb_io,wf_nb_io_groups,WF,WF_buffer,WF_buffered_IO
  use FFT_m,      ONLY:fft_dim_loaded,fft_size,fft_dim,fft_multiplier
  use IO_m,       ONLY:io_reset,max_io_units,serial_number,frag_WF
- use BS_solvers, ONLY:BSS_mode,BSS_n_freqs,BSS_er,BSS_dr,&
-&                     BSS_q0,Haydock_threshold,Haydock_iterIO,Haydock_iterMAX,BSS_uses_DbGd,BSS_Wd,&
+ use BS_solvers, ONLY:BSS_mode,BSS_n_freqs,BSS_er,BSS_dr,BSS_P_dir,BSS_E_dir,BSS_Q_dir,&
+&                     Haydock_threshold,Haydock_iterIO,Haydock_iterMAX,BSS_uses_DbGd,BSS_Wd,&
 &                     BSS_damp_reference,BSS_Vnl_included,BSS_uses_GreenF,BSS_inversion_mode,&
 &                     BSS_perturbative_width,K_INV_EPS,K_INV_PL,K_INV_PI_PH,BSS_desc
  use descriptors,ONLY:IO_desc_reset
 #if defined _SLEPC && !defined _NL
  use BS_solvers, ONLY:BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_target_E,BSS_slepc_maxit,&
- &                    BSS_slepc_precondition,BSS_slepc_approach,BSS_slepc_mpd,BSS_slepc_matrix_format
+ &                    BSS_slepc_precondition,BSS_slepc_approach,BSS_slepc_matrix_format,BSS_slepc_mpd
 #endif
  use BS,         ONLY:BS_n_g_W,BS_eh_en,BS_identifier,BS_q,BS_eh_win,MAX_BSK_LIN_size,&
 &                     BS_K_dim,BS_not_const_eh_f,BSK_mode,l_BSE_kernel_complete,&
@@ -449,7 +449,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  BSS_inversion_mode='pf'
  BSS_er=(/0._SP,10._SP/)/HA2EV
  BSS_dr=0.1_SP/HA2EV
- BSS_q0=(/1._SP,0._SP,0._SP/)
+ BSS_E_dir=(/1._SP,0._SP,0._SP/)
+ BSS_P_dir=(/0._SP,1._SP,0._SP/)
+ BSS_Q_dir=(/0._SP,0._SP,1._SP/)
  BSS_uses_DbGd=.FALSE.
  BSS_damp_reference=0._SP
  BS_K_is_ALDA=.FALSE.
diff --git a/src/modules/mod_BS.F b/src/modules/mod_BS.F
index e6e82f929d..2812f48647 100644
--- a/src/modules/mod_BS.F
+++ b/src/modules/mod_BS.F
@@ -48,11 +48,13 @@ module BS
  logical :: l_BS_jdos
  logical :: l_BS_abs
  logical :: l_BS_kerr
+ logical :: l_BS_kerr_asymm
  logical :: l_BS_photolum
  logical :: l_BS_magnons
  logical :: l_BS_dichroism
  logical :: l_BS_optics
  logical :: l_BS_esort
+ logical :: l_BS_esort_indx
  !
  ! Dimensions
  !=============
@@ -278,69 +280,9 @@ integer function T_group(I)
      if (I>=BS_T_grp_1st_el(BS_nT_grps-1)) T_group=BS_nT_grps
    end function
    !
-   subroutine BS_oscillators_alloc(EXCHANGE,iG,CORRELATION,iB)
-     !
-     use pars,       ONLY:IP
-     use stderr,     ONLY:intc
-     !
-     logical, intent(in) :: EXCHANGE,CORRELATION
-     integer, intent(in) :: iG,iB
-     !
-     if (EXCHANGE) then
-       if (BS_T_grp(iG)%size==0) return
-       YAMBO_ALLOC(BS_T_grp(iG)%O_x,(BS_n_g_exch,BS_T_grp(iG)%size))
-#ifdef _CUDA
-       allocate(BS_T_grp(iG)%O_x_d,mold=BS_T_grp(iG)%O_x)
-#endif
-
-     endif
-     !
-     if (CORRELATION) then
-       if (BS_blk(iB)%N_oscillators==0) return
-       YAMBO_ALLOC(BS_blk(iB)%O_c,(O_ng,BS_blk(iB)%N_oscillators))
-#ifdef _CUDA
-       allocate(BS_blk(iB)%O_c_d,mold=BS_blk(iB)%O_c)
-#endif
-     endif
-     !
-   end subroutine
-   !
-   subroutine BS_oscillators_free(iG_ref,iB_ref)
-     use stderr,     ONLY:intc
-     integer, intent(in) :: iG_ref,iB_ref
-     integer :: iG,iB,ik_loop,ip_loop,ik_now,ip_now
-     do iG=iG_ref,1,-1
-       YAMBO_FREE(BS_T_grp(iG)%O_x)
-#ifdef _CUDA
-       if (allocated(BS_T_grp(iG)%O_x_d)) deallocate(BS_T_grp(iG)%O_x_d)
-#endif
-     enddo
-     if(iB_ref==0) return
-     ik_now=BS_blk(iB_ref)%ik
-     ip_now=BS_blk(iB_ref)%ip
-     if(iB_ref==n_BS_blks) then
-       ik_now=0
-       ip_now=0
-     endif
-     do iB=iB_ref,1,-1
-       ik_loop=BS_blk(iB)%ik
-       ip_loop=BS_blk(iB)%ip
-       if ( ik_now==ik_loop .and. ip_now==ip_loop .and. .not.l_BSE_minimize_memory ) cycle
-       if (.not.allocated(BS_blk(iB)%O_c)) exit
-       YAMBO_FREE(BS_blk(iB)%O_c)
-       YAMBO_FREE(BS_blk(iB)%O_table)
-       YAMBO_FREE(BS_blk(iB)%kp_table)
-#ifdef _CUDA
-       if (allocated(BS_blk(iB)%O_c_d)) deallocate(BS_blk(iB)%O_c_d)
-#endif
-     enddo
-     !
-   end subroutine
-   !
    subroutine BS_Blocks_and_Transitions_alloc(E,iT,dom_k_T_group)
      !
      use pars,       ONLY:IP,cZERO
-     use stderr,     ONLY:intc
      use electrons,  ONLY:levels,n_sp_pol
      integer,      intent(in) :: iT,dom_k_T_group
      type(levels), intent(in) :: E
@@ -388,7 +330,6 @@ subroutine BS_blks_free(i_BS_mat)
    end subroutine
    !
    subroutine BS_Blocks_and_Transitions_free()
-     use stderr,     ONLY:intc
      use parallel_m, ONLY:PAR_BS_T_grps_index
      integer :: iT
      !
@@ -423,7 +364,7 @@ subroutine BS_Blocks_and_Transitions_free()
      !
      if(allocated(BS_blk)) deallocate(BS_blk)
      !
-   end subroutine
+   end subroutine BS_Blocks_and_Transitions_free
    !
    function BS_Block_size(i_block)
      !
diff --git a/src/modules/mod_BS_solvers.F b/src/modules/mod_BS_solvers.F
index 9e1a973f63..306f3ea043 100644
--- a/src/modules/mod_BS_solvers.F
+++ b/src/modules/mod_BS_solvers.F
@@ -59,6 +59,9 @@ module BS_solvers
  real(SP) :: BSS_dr(2)
  real(SP) :: BSS_damp_reference
  real(SP) :: BSS_q0(3)
+ real(SP) :: BSS_E_dir(3)         ! Electric field
+ real(SP) :: BSS_P_dir(3)         ! Induced polarization
+ real(SP) :: BSS_Q_dir(3)         ! Field propagation direction
  logical  :: BSS_uses_DbGd
  logical  :: BSS_Vnl_included
  logical  :: BSS_uses_GreenF
@@ -152,7 +155,7 @@ integer function variables_BS(ID,iq,local_desc,CLOSE_the_menu,X)
    end function
    !
    integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,&
-   &                                    bsL_magn,bsR_magn,bsR_kerr,bsR_pl)
+   &                                    bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,bsR_pl)
      use pars,          ONLY:SP
      use X_m,           ONLY:X_t
      implicit none
@@ -160,7 +163,7 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,&
      integer                      :: iq,ID,i_BS_mat
      complex(SP)        ,optional :: bsE(:)
      real(SP),   pointer,optional :: BsE_corr(:,:),bsR_pl(:,:)
-     complex(SP),pointer,optional :: bsR_kerr(:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
+     complex(SP),pointer,optional :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
    end function
    !
    integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,Vnp1)
diff --git a/src/modules/mod_R_lattice.F b/src/modules/mod_R_lattice.F
index 4b3d23e89a..a9e9d8db86 100644
--- a/src/modules/mod_R_lattice.F
+++ b/src/modules/mod_R_lattice.F
@@ -121,6 +121,7 @@ module R_lattice
  integer              :: n_g_shells     ! Number of G-shells
  integer              :: ng_vec         ! Number of G-vectors
  integer              :: ng_closed      ! Number of G closed
+ integer              :: G_m_G_maxval   ! Maximum value of G_m_G
  integer ,allocatable :: ng_in_shell(:) ! Number of G in each shell
  integer ,allocatable :: g_rot(:,:)
  integer ,allocatable :: G_m_G(:,:)
diff --git a/src/modules/mod_X.F b/src/modules/mod_X.F
index f6a116dba2..74e771a73e 100644
--- a/src/modules/mod_X.F
+++ b/src/modules/mod_X.F
@@ -65,6 +65,7 @@ module X_m
  ! Absorption & Polarizability
  !
  integer                  :: N_BS_E_sorted=0
+ integer,     allocatable :: BS_E_sorted_indx(:,:,:)
  complex(SP), allocatable :: BS_E_sorted(:,:,:)
  complex(SP), allocatable :: Resp_ii(:,:)
  complex(SP), allocatable :: Resp_ij(:,:)
@@ -160,7 +161,7 @@ module X_m
    integer               :: N_messages
    character(schlen)     :: messages(N_MAX_columns)
  end type
- integer, parameter     :: N_X_obs=15
+ integer, parameter     :: N_X_obs=16
  type(X_obs_t)          :: X_obs(N_X_obs) 
  !
  interface
@@ -176,7 +177,7 @@ subroutine X_OUTPUT_messages(iq,ig,Vnl,GF,ordering,Q_plus_G,MORE)
      character(*), optional :: MORE
    end subroutine
    !
-   subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
+   subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
      use pars,           ONLY:SP
      use frequency,      ONLY:w_samp
      use descriptors,    ONLY:IO_desc
@@ -186,6 +187,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
      integer,      optional:: IQ,IG
      type(IO_desc),optional:: DESC
      real(SP),     optional:: data(:)
+     integer,      optional:: indexes(:)
    end subroutine
    !
    subroutine X_Double_Grid_setup(iq,k,k_FGbz,nTrans_percent,Radius)
diff --git a/src/modules/mod_collision_ext.F b/src/modules/mod_collision_ext.F
index 79558a71df..0934280332 100644
--- a/src/modules/mod_collision_ext.F
+++ b/src/modules/mod_collision_ext.F
@@ -92,6 +92,13 @@ module collision_ext
  type(COLLISIONS_element), allocatable :: HXC_COLL_element(:)
  type(COLLISIONS_element), allocatable :: P_COLL_element(:)
  !
+ ! On the fly long-range SEX (LSEX) collisions
+ ! ====================================
+ integer :: ng_oscll                  ! number of G-vectors in the Oscillators
+ complex(SP), allocatable :: W(:,:,:) ! screened interaction
+ complex(SP), allocatable :: OSCLL(:,:,:,:,:) ! Oscillators
+ complex(SP), allocatable :: OSCLL_k(:,:,:,:) ! Oscillators in k, just for the evaluation
+ !
  contains
    !
    subroutine COLLISIONS_naming( H_SE_kind , what )
diff --git a/src/modules/mod_global_XC.F b/src/modules/mod_global_XC.F
index afde58dd0c..8bb40f39e2 100644
--- a/src/modules/mod_global_XC.F
+++ b/src/modules/mod_global_XC.F
@@ -101,6 +101,8 @@ module global_XC
    H_SE_SRPA_SLT      = 325, &  ! OEP-COHSEX (SLT apprx)
    H_SE_COH           = 326, &  ! Coulomb-Hole 
    H_SE_SEX           = 327, &  ! Screened-eXchange
+   H_SE_LSEX          = 328, &  ! Long range-screened-eXchange
+   H_SE_LHF           = 329, &  ! Long range-eXchange
    H_SE_COHSEX        = H_SE_SEX+H_SE_COH  ! Coulomb-Hole + Screened-eXchange
  !
  ! Phonons
diff --git a/src/modules/mod_interfaces.F b/src/modules/mod_interfaces.F
index 6c240a30f9..131d6c7d58 100644
--- a/src/modules/mod_interfaces.F
+++ b/src/modules/mod_interfaces.F
@@ -171,8 +171,8 @@ integer function eval_G_minus_G(iG,iGo,force_recompute,COMM)
      type(yMPI_comm), optional :: COMM
    end function
    !
-   subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,&
-&                     sp_pol_to_load,space,title,impose_free_and_alloc,&
+   subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,sp_pol_to_load,&
+&                     k_extrema_only,space,title,impose_free_and_alloc,&
 &                     force_WFo,keep_states_to_load,quiet)
      use wave_func,       ONLY:WAVEs
      integer :: iG_max,iGo_max,bands_to_load(2),kpts_to_load(2)
@@ -180,16 +180,18 @@ subroutine WF_load(WF,iG_max,iGo_max,bands_to_load,kpts_to_load,&
      integer,     optional :: sp_pol_to_load(2)
      character(*),optional :: space
      character(*),optional :: title
+     logical     ,optional :: k_extrema_only
      logical     ,optional :: impose_free_and_alloc
      logical     ,optional :: force_WFo
      logical     ,optional :: keep_states_to_load
      logical,     optional, intent(in) :: quiet
    end subroutine
    !
-   subroutine WF_free(WF,keep_fft)
+   subroutine WF_free(WF,keep_fft,keep_states_to_load)
      use wave_func,       ONLY:WAVEs
      type(WAVEs)           :: WF
      logical     ,optional :: keep_fft
+     logical     ,optional :: keep_states_to_load
    end subroutine
    !
    subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,N_f,N_m,I_dir,E_k_dir,E_k_ind)
@@ -205,21 +207,44 @@ subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,N_f,N_m,I_dir,E_k_dir,E_k_ind)
      integer , optional :: E_k_ind(n_sp_pol,2)  ! k->k'   of the indirect gap
    end subroutine 
    !
-   subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left,BS_R_right,BS_E_SOC_corr,&
-     &                                  BS_R_left_magn,BS_R_right_magn,BS_R_right_kerr,BS_R_PL)
+   subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
+     &                                  BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
      use BS_solvers,    ONLY:BSS_n_eig
      use pars,          ONLY:SP
      use frequency,     ONLY:w_samp
      type(w_samp),intent(in)  :: W
      integer,     intent(in)  :: iq,i_BS_mat
      complex(SP), intent(in)  :: BS_E(BSS_n_eig)
-     complex(SP), pointer, intent(in) :: BS_R_left(:),BS_R_right(:)
+     complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:)
      real(SP),    pointer, intent(in) :: BS_E_SOC_corr(:,:)
      complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:)
-     complex(SP), pointer, intent(in) :: BS_R_right_kerr(:)
+     complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:)
      real(SP),    pointer, intent(in) :: BS_R_PL(:,:)
    end subroutine
    !
+   subroutine K_diago_right_residuals(mode,i_BS_mat,BS_E,BS_R_right,BS_V_right)
+     use BS_solvers,    ONLY:BSS_n_eig
+     use pars,          ONLY:SP
+     implicit none
+     character(*),intent(in)  :: mode
+     integer,     intent(in)  :: i_BS_mat
+     complex(SP), intent(in)  :: BS_E(BSS_n_eig)
+     complex(SP), intent(out) :: BS_R_right(BSS_n_eig)
+     complex(SP), intent(in)  :: BS_V_right(:,:)
+   end subroutine K_diago_right_residuals
+   !
+   subroutine K_diago_left_residuals(mode,i_BS_mat,BS_E,BS_R_left,BS_V_left,BS_overlap)
+     use BS_solvers,    ONLY:BSS_n_eig
+     use pars,          ONLY:SP
+     implicit none
+     character(*),intent(in)  :: mode
+     integer,     intent(in)  :: i_BS_mat
+     complex(SP), intent(in)  :: BS_E(BSS_n_eig)
+     complex(SP),target,  intent(out) :: BS_R_left(BSS_n_eig)
+     complex(SP),         intent(in)  :: BS_V_left(:,:)
+     complex(SP),optional,intent(in)  :: BS_overlap(BSS_n_eig,BSS_n_eig)
+   end subroutine K_diago_left_residuals
+   !
    subroutine PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap) 
      use pars,           ONLY:SP
      use BS,             ONLY:BS_H_dim
diff --git a/src/modules/mod_nl_optics.F b/src/modules/mod_nl_optics.F
index fe637aa313..70b7254d0c 100644
--- a/src/modules/mod_nl_optics.F
+++ b/src/modules/mod_nl_optics.F
@@ -64,6 +64,9 @@ module nl_optics
  ! Density matrix
  complex(SP), allocatable :: dG(:,:,:) 
  !
+ ! Sigma sex at equilibrium (for oscillators)
+ complex(SP), allocatable :: Sigma_SEX_EQ(:,:,:,:)
+ !
  ! Integrator Variables
  !
  integer, parameter :: EULER    = 1
@@ -99,8 +102,11 @@ module nl_optics
  integer, parameter :: LRCW   = 6
  integer, parameter :: HF     = 7
  integer, parameter :: SEX    = 8
+ integer, parameter :: LSEX   = 9
+ integer, parameter :: LHF    = 10
  !
  logical :: eval_COLLISIONS
+ logical :: eval_OSCLL
  logical :: eval_dG
  !
  ! Non-linear spectra parameters
@@ -141,6 +147,7 @@ module nl_optics
  !
  logical  :: l_use_DIPOLES   ! Use standard dipoles (valid only for linear response)
  logical  :: l_eval_CURRENT  ! Evaluate current using the commutator v=[H,r] and the IP formulation
+ logical  :: l_test_OSCLL    ! Test collisions built using OSCLLs vs standard Collisions
  !
  ! IO variables
  !
@@ -182,13 +189,14 @@ subroutine NL_alloc(en)
        YAMBO_ALLOC(V_xc_0,(fft_size,n_spin))
      endif          
      !
-     if(eval_dG) then
-       YAMBO_ALLOC(dG,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk))
-     endif
      if(l_use_Hxc_collisions) then
        YAMBO_ALLOC(RT_Vnl_xc,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol))
      endif          
      !
+     if(eval_dG) then
+       YAMBO_ALLOC(dG,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk))
+     endif
+     !
      ! Reference energies
      !
      YAMBO_ALLOC(E_full,(1:NL_bands(2),QP_nk,n_sp_pol))
@@ -227,6 +235,10 @@ subroutine NL_alloc_k_distributed(en)
      YAMBO_ALLOC(I_relax,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),maxval(en%nbf),PAR_Xk_nibz,n_sp_pol))
      YAMBO_ALLOC(Ho_plus_Sigma,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),PAR_Xk_nibz,n_sp_pol))
      !
+     if(Correlation==LSEX.or.Correlation==LHF) then
+       YAMBO_ALLOC(Sigma_SEX_EQ,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),n_sp_pol,PAR_Xk_nibz))
+     endif          
+     !
    end subroutine NL_alloc_k_distributed
    !
    subroutine NL_free
@@ -267,6 +279,9 @@ subroutine NL_free
      if(l_use_Hxc_collisions) then
        YAMBO_FREE(RT_Vnl_xc)
      endif
+     if(eval_dG) then
+       YAMBO_FREE(dG)
+     endif
      !
    end subroutine NL_free
    !
diff --git a/src/modules/mod_openmp.F b/src/modules/mod_openmp.F
index 408ce42556..8fcb58f977 100644
--- a/src/modules/mod_openmp.F
+++ b/src/modules/mod_openmp.F
@@ -28,6 +28,7 @@ module openmp
  integer     :: n_threads_K     = 0  
  integer     :: n_threads_NL    = 0
  integer     :: n_threads_BZINDX= 0
+ integer     :: n_threads_OSCLL = 0
  integer     :: n_threads_now   = 0
  logical     :: master_thread   = .TRUE.
  !
@@ -67,14 +68,15 @@ subroutine OPENMP_initialize( )
      !
      n_threads     = 1
      n_threads_limit = 1
-     n_threads_now = 1
-     n_threads_FFT = 0
-     n_threads_X   = 1
-     n_threads_SE  = 1
-     n_threads_RT  = 1
-     n_threads_K   = 1
-     n_threads_DIP = 1
-     n_threads_NL  = 1
+     n_threads_now  = 1
+     n_threads_FFT  = 0
+     n_threads_X    = 1
+     n_threads_SE   = 1
+     n_threads_RT   = 1
+     n_threads_K    = 1
+     n_threads_DIP  = 1
+     n_threads_NL   = 1
+     n_threads_OSCLL= 1
      !
      n_out_threads = 1
      n_inn_threads = 1
diff --git a/src/modules/mod_parallel.F b/src/modules/mod_parallel.F
index 0b85f48b88..70d78fca48 100644
--- a/src/modules/mod_parallel.F
+++ b/src/modules/mod_parallel.F
@@ -75,7 +75,8 @@ module parallel_m
  !
  ! Logicals
  !
- logical            :: l_par_X_T,l_par_X_G,l_par_X_G_finite_q,l_par_DIP,l_par_BZINDX,l_par_SE,l_par_RT,l_par_SC,l_par_NL
+ logical            :: l_par_X_T,l_par_X_G,l_par_X_G_finite_q,l_par_DIP,l_par_BZINDX,l_par_SE, &
+&                      l_par_RT,l_par_SC,l_par_NL,l_par_OSCLL
  !
  ! In a parallel runs, when some loops not involving all CPU's are parallelized it is usefull to keep 
  ! the operations local in the cpu world
@@ -540,7 +541,7 @@ integer function i_INTER_CHAIN(N_father,N_child)
    character(lchlen) function PARALLEL_message(i_s)
      use stderr, ONLY:intc
      use openmp, ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_K, &
-&                     n_threads_NL,n_threads_BZINDX,n_threads
+&                     n_threads_NL,n_threads_BZINDX,n_threads_OSCLL,n_threads
      integer :: i_s
      !
      PARALLEL_message=" "
@@ -562,6 +563,7 @@ character(lchlen) function PARALLEL_message(i_s)
        if (n_threads_RT    >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_RT))//"(threads@RT)"
        if (n_threads_K     >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_K))//"(threads@K)"
        if (n_threads_NL    >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_NL))//"(threads@NL)"
+       if (n_threads_OSCLL >0) PARALLEL_message=trim(PARALLEL_message)//"-"//trim(intc(n_threads_OSCLL))//"(threads@OSCLL)"
      else
        PARALLEL_message=trim(CPU_structure(i_s)%Short_Description)//"(environment)-"//&
 &                       trim(CPU_structure(i_s)%CPU_string)//"(CPUs)-"//&
@@ -608,6 +610,8 @@ subroutine CPU_str_reset()
      CPU_structure(9)%Short_Description="BZINDX"
      CPU_structure(10)%Long_Description="Phonon_Self_Energy"
      CPU_structure(10)%Short_Description="PH_SE"
+     CPU_structure(11)%Long_Description="Oscillators"
+     CPU_structure(11)%Short_Description="OSCLL"
    end subroutine
    !
    subroutine COMM_reset(COMM)
diff --git a/src/modules/mod_wrapper.F b/src/modules/mod_wrapper.F
index 939cf485fa..60997b16db 100644
--- a/src/modules/mod_wrapper.F
+++ b/src/modules/mod_wrapper.F
@@ -13,7 +13,12 @@ module wrapper
  ! TRANSA = 'T' or 't',  op( A ) = A'.
  ! TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
  !
- use pars,   ONLY:SP,cI
+ use pars,          ONLY:SP,cI
+ !
+ ! Implementation based on C_F_POINTER assumes that variables are contiguous,
+ ! which may not be the case
+ !
+ use iso_c_binding, ONLY:C_LOC,C_F_POINTER
 #ifdef _CUDA
  use cublas
 #endif
@@ -54,9 +59,9 @@ module wrapper
  end interface
  !
  interface Vstar_dot_V
-   module procedure Vstar_dot_V_c1_cpu, Vstar_dot_V_c2_cpu
+   module procedure Vstar_dot_V_c1_cpu,Vstar_dot_V_c2_cpu,V_dot_V_r1_cpu,V_dot_V_r2_cpu
 #ifdef _CUDA
-   module procedure Vstar_dot_V_c1_gpu, Vstar_dot_V_c2_gpu
+   module procedure Vstar_dot_V_c1_gpu,Vstar_dot_V_c2_gpu,V_dot_V_r1_gpu,V_dot_V_r2_gpu
 #endif
  end interface
  !
@@ -68,9 +73,9 @@ module wrapper
  end interface
  !
  interface V_dot_V
-   module procedure V_dot_V_r1_cpu, V_dot_V_c1_cpu, V_dot_V_c2_cpu
+   module procedure V_dot_V_r1_cpu,V_dot_V_r2_cpu,V_dot_V_c1_cpu,V_dot_V_c2_cpu
 #ifdef _CUDA
-   module procedure V_dot_V_c1_gpu
+   module procedure V_dot_V_r1_gpu,V_dot_V_r2_gpu,V_dot_V_c1_gpu,V_dot_V_c2_gpu
 #endif
  end interface
  !
@@ -356,7 +361,7 @@ end subroutine V_copy
    complex(SP) function Vstar_dot_V_c1_cpu(N,CX,CY)
      implicit none
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(*),CY(*)
+     complex(SP),intent(in) :: CX(:),CY(:)
 #if defined _DOUBLE
      complex(SP)::zdotc
      Vstar_dot_V_c1_cpu=ZDOTC(N,CX,1,CY,1)
@@ -369,7 +374,7 @@ end function Vstar_dot_V_c1_cpu
    complex(SP) function Vstar_dot_V_c2_cpu(N,CX,CY)
      implicit none
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(:,:),CY(:,:)
+     complex(SP),target,contiguous,intent(in) :: CX(:,:),CY(:,:)
 #if defined _DOUBLE
      complex(SP)::zdotc
      Vstar_dot_V_c2_cpu=ZDOTC(N,CX,1,CY,1)
@@ -383,7 +388,7 @@ end function Vstar_dot_V_c2_cpu
    complex(SP) function Vstar_dot_V_c1_gpu(N,CX,CY)
      implicit none
      integer,             intent(in) :: N
-     complex(SP), device, intent(in) :: CX(*),CY(*)
+     complex(SP), device, intent(in) :: CX(:),CY(:)
      !
 #if defined _DOUBLE
      Vstar_dot_V_c1_gpu=cublasZdotc(N,CX,1,CY,1)
@@ -395,7 +400,10 @@ end function Vstar_dot_V_c1_gpu
    complex(SP) function Vstar_dot_V_c2_gpu(N,CX,CY)
      implicit none
      integer,             intent(in) :: N
-     complex(SP), device, intent(in) :: CX(:,:),CY(:,:)
+     complex(SP), device, target, intent(in) :: CX(:,:),CY(:,:)
+     !real(SP),pointer    :: CX1D(:),CY1D(:)
+     !call C_F_POINTER(C_LOC(CX),CX1D,(/N/))
+     !call C_F_POINTER(C_LOC(CY),CY1D,(/N/))
      !
 #if defined _DOUBLE
      Vstar_dot_V_c2_gpu=cublasZdotc(N,CX,1,CY,1)
@@ -403,7 +411,6 @@ complex(SP) function Vstar_dot_V_c2_gpu(N,CX,CY)
      Vstar_dot_V_c2_gpu=cublasCdotc(N,CX,1,CY,1)
 #endif
    end function Vstar_dot_V_c2_gpu
-   !
 #endif
    !
    !==============
@@ -445,7 +452,7 @@ end function Vstar_dot_VV_c1_gpu
    real(SP) function V_dot_V_r1_cpu(N,CX,CY)
      implicit none
      integer, intent(in) :: N
-     real(SP),intent(in) :: CX(*),CY(*)
+     real(SP),intent(in) :: CX(:),CY(:)
 #if defined _DOUBLE
      real(SP)::ddot
      V_dot_V_r1_cpu=DDOT(N,CX,1,CY,1)
@@ -454,11 +461,64 @@ real(SP) function V_dot_V_r1_cpu(N,CX,CY)
      V_dot_V_r1_cpu=SDOT(N,CX,1,CY,1)
 #endif
    end function V_dot_V_r1_cpu
+   !
+   real(SP) function V_dot_V_r2_cpu(N,CX,CY)
+     implicit none
+     integer, intent(in) :: N
+     real(SP),target,intent(in) :: CX(:,:),CY(:,:)
+#if defined _DOUBLE
+     real(SP)::ddot
+#else
+     real(SP)::sdot
+#endif
+     !real(SP),pointer    :: CX1D(:),CY1D(:)
+     !call C_F_POINTER(C_LOC(CX),CX1D,(/N/))
+     !call C_F_POINTER(C_LOC(CY),CY1D,(/N/))
+#if defined _DOUBLE
+     V_dot_V_r2_cpu=DDOT(N,CX,1,CY,1)
+#else
+     V_dot_V_r2_cpu=SDOT(N,CX,1,CY,1)
+#endif
+   end function V_dot_V_r2_cpu
+   !
+#ifdef _CUDA
+   real(SP) function V_dot_V_r1_gpu(N,CX,CY)
+     implicit none
+     integer,    intent(in) :: N
+     real(SP), device, intent(in) :: CX(:),CY(:)
+#if defined _DOUBLE
+     real(SP)::ddot
+     V_dot_V_r1_gpu=cublasDDOT(N,CX,1,CY,1)
+#else
+     real(SP)::sdot
+     V_dot_V_r1_gpu=cublasSDOT(N,CX,1,CY,1)
+#endif
+   end function V_dot_V_r1_gpu
+   !
+   real(SP) function V_dot_V_r2_gpu(N,CX,CY)
+     implicit none
+     integer,    intent(in) :: N
+     real(SP), device, target, intent(in) :: CX(:,:),CY(:,:)
+#if defined _DOUBLE
+     real(SP)::ddot
+#else
+     real(SP)::sdot
+#endif
+     !real(SP),pointer    :: CX1D(:),CY1D(:)
+     !call C_F_POINTER(C_LOC(CX),CX1D,(/N/))
+     !call C_F_POINTER(C_LOC(CY),CY1D,(/N/))
+#if defined _DOUBLE
+     V_dot_V_r2_gpu=cublasDDOT(N,CX,1,CY,1)
+#else
+     V_dot_V_r2_gpu=cublasSDOT(N,CX,1,CY,1)
+#endif
+   end function V_dot_V_r2_gpu
+#endif
    !
    complex(SP) function V_dot_V_c1_cpu(N,CX,CY)
      implicit none
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(*),CY(*)
+     complex(SP),intent(in) :: CX(:),CY(:)
 #if defined _DOUBLE
      complex(SP)::zdotu
      V_dot_V_c1_cpu=ZDOTU(N,CX,1,CY,1)
@@ -471,12 +531,18 @@ end function V_dot_V_c1_cpu
    complex(SP) function V_dot_V_c2_cpu(N,CX,CY)
      implicit none
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(:,:),CY(:,:)
+     complex(SP),target,intent(in) :: CX(:,:),CY(:,:)
 #if defined _DOUBLE
      complex(SP)::zdotu
-     V_dot_V_c2_cpu=ZDOTU(N,CX,1,CY,1)
 #else
      complex(SP)::cdotu
+#endif
+     !complex(SP),pointer    :: CX1D(:),CY1D(:)
+     !call C_F_POINTER(C_LOC(CX),CX1D,(/N/))
+     !call C_F_POINTER(C_LOC(CY),CY1D,(/N/))
+#if defined _DOUBLE
+     V_dot_V_c2_cpu=ZDOTU(N,CX,1,CY,1)
+#else
      V_dot_V_c2_cpu=CDOTU(N,CX,1,CY,1)
 #endif
    end function V_dot_V_c2_cpu
@@ -485,7 +551,7 @@ end function V_dot_V_c2_cpu
    complex(SP) function V_dot_V_c1_gpu(N,CX,CY)
      implicit none
      integer,    intent(in) :: N
-     complex(SP), device, intent(in) :: CX(*),CY(*)
+     complex(SP), device, intent(in) :: CX(:),CY(:)
 #if defined _DOUBLE
      complex(SP)::zdotu
      V_dot_V_c1_gpu=cublasZDOTU(N,CX,1,CY,1)
@@ -494,6 +560,25 @@ complex(SP) function V_dot_V_c1_gpu(N,CX,CY)
      V_dot_V_c1_gpu=cublasCDOTU(N,CX,1,CY,1)
 #endif
    end function V_dot_V_c1_gpu
+   !
+   complex(SP) function V_dot_V_c2_gpu(N,CX,CY)
+     implicit none
+     integer,    intent(in) :: N
+     complex(SP), device, target, intent(in) :: CX(:,:),CY(:,:)
+#if defined _DOUBLE
+     complex(SP)::zdotu
+#else
+     complex(SP)::cdotu
+#endif
+     !complex(SP),pointer    :: CX1D(:),CY1D(:)
+     !call C_F_POINTER(C_LOC(CX),CX1D,(/N/))
+     !call C_F_POINTER(C_LOC(CY),CY1D,(/N/))
+#if defined _DOUBLE
+     V_dot_V_c2_gpu=cublasZDOTU(N,CX,1,CY,1)
+#else
+     V_dot_V_c2_gpu=cublasCDOTU(N,CX,1,CY,1)
+#endif
+   end function V_dot_V_c2_gpu
 #endif
    !
    !==============
diff --git a/src/modules/mod_wrapper_omp.F b/src/modules/mod_wrapper_omp.F
index 47669d8ee9..029ffdd528 100644
--- a/src/modules/mod_wrapper_omp.F
+++ b/src/modules/mod_wrapper_omp.F
@@ -43,6 +43,10 @@ module wrapper_omp
    module procedure V_dot_V_r1_omp, V_dot_V_c1_omp, V_dot_V_c2_omp
  end interface
  !
+ interface Vstar_dot_V_omp
+   module procedure V_dot_V_r1_omp, Vstar_dot_V_c1_omp, Vstar_dot_V_c2_omp
+ end interface
+ !
  public :: V_copy_omp
  public :: Vstar_dot_V_omp
  public :: V_dot_V_omp
@@ -283,9 +287,9 @@ end subroutine V_copy_omp
    ! DOT PRODUCTS
    !==============
    !
-   complex(SP) function Vstar_dot_V_omp(N,CX,CY)
+   complex(SP) function Vstar_dot_V_c1_omp(N,CX,CY)
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(*),CY(*)
+     complex(SP),intent(in) :: CX(:),CY(:)
      !
 #if defined _OPENMP_INT_LINALG
      integer     :: i
@@ -297,19 +301,45 @@ complex(SP) function Vstar_dot_V_omp(N,CX,CY)
          ctmp=ctmp +conjg(CX(i))*CY(i)
        enddo 
        !$omp end parallel do
-       Vstar_dot_V_omp=ctmp
+       Vstar_dot_V_c1_omp=ctmp
+     else
+       Vstar_dot_V_c1_omp=Vstar_dot_V(N,CX,CY)
+     endif
+#else
+     Vstar_dot_V_c1_omp=Vstar_dot_V(N,CX,CY)
+#endif
+     !
+   end function Vstar_dot_V_c1_omp
+   !
+   complex(SP) function Vstar_dot_V_c2_omp(N,CX,CY)
+     integer,    intent(in) :: N
+     complex(SP),intent(in) :: CX(:,:),CY(:,:)
+     !
+#if defined _OPENMP_INT_LINALG
+     integer     :: i,j
+     complex(SP) :: ctmp
+     if (n_threads_now>1) then
+       ctmp=0.0_SP
+       !$omp parallel do default(shared), private(i), reduction(+:ctmp), collapse(2)
+       do j=1,N/size(CX,1)
+       do i=1,size(CX,1)
+         ctmp=ctmp +conjg(CX(i,j))*CY(i,j)
+       enddo 
+       enddo 
+       !$omp end parallel do
+       Vstar_dot_V_c2_omp=ctmp
      else
-       Vstar_dot_V_omp=Vstar_dot_V(N,CX,CY)
+       Vstar_dot_V_c2_omp=Vstar_dot_V(N,CX,CY)
      endif
 #else
-     Vstar_dot_V_omp=Vstar_dot_V(N,CX,CY)
+     Vstar_dot_V_c2_omp=Vstar_dot_V(N,CX,CY)
 #endif
      !
-   end function Vstar_dot_V_omp   
+   end function Vstar_dot_V_c2_omp
    !
    real(SP) function V_dot_V_r1_omp(N,CX,CY)
      integer, intent(in) :: N
-     real(SP),intent(in) :: CX(*),CY(*)
+     real(SP),intent(in) :: CX(:),CY(:)
      !
 #if defined _OPENMP_INT_LINALG
      integer  :: i
@@ -332,7 +362,7 @@ end function V_dot_V_r1_omp
    !
    complex(SP) function V_dot_V_c1_omp(N,CX,CY)
      integer,    intent(in) :: N
-     complex(SP),intent(in) :: CX(*),CY(*)
+     complex(SP),intent(in) :: CX(:),CY(:)
      !
 #if defined _OPENMP_INT_LINALG
      integer     :: i
diff --git a/src/nloptics/.objects b/src/nloptics/.objects
index 1de603485f..708eaa3f0d 100644
--- a/src/nloptics/.objects
+++ b/src/nloptics/.objects
@@ -1,5 +1,6 @@
 #if defined _NL
-objs = NL_driver.o NL_initialize.o NL_start_and_restart.o el_density_vbands.o NL_Integrator.o NL_damping.o \
+objs = NL_driver.o NL_initialize.o NL_start_and_restart.o el_density_vbands.o NL_Integrator.o \
+       NL_damping.o NL_test_collisions.o \
        NL_Hamiltonian.o NL_output.o DIP_polarization.o NL_average_operator.o EXC_macroscopic_JGM.o \
        NL_databases_IO.o NL_build_dG_lesser.o NL_build_valence_bands.o EXP_step.o \
        NL_average_operator.o NL_current.o NL_Berry_current.o EXP_op.o INVINT_step.o RK_basestep.o 
diff --git a/src/nloptics/DOUBLE_project.dep b/src/nloptics/DOUBLE_project.dep
index fbac890c90..9c580b7e80 100644
--- a/src/nloptics/DOUBLE_project.dep
+++ b/src/nloptics/DOUBLE_project.dep
@@ -16,6 +16,7 @@
  NL_initialize.o
  NL_output.o
  NL_start_and_restart.o
+ NL_test_collisions.o
  RK_basestep.o
  el_density_vbands.o
 
diff --git a/src/nloptics/NL_Hamiltonian.F b/src/nloptics/NL_Hamiltonian.F
index b7df9a6327..d8988afed1 100644
--- a/src/nloptics/NL_Hamiltonian.F
+++ b/src/nloptics/NL_Hamiltonian.F
@@ -27,7 +27,7 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  use xc_functionals, ONLY:V_xc,XC_potential_driver
  use nl_optics,      ONLY:Correlation,V_xc_0,full_rho,IPA,E_full,LRC,JGM,LRCW,l_use_DIPOLES,NL_LRC_alpha,   &
 &                         NL_initial_P,E_tot,E_ext,E_ks,E_xc_0,I_relax,Alpha_ED,dG,eval_dG, & 
-&                         NL_bands,VAL_BANDS,NL_P
+&                         NL_bands,VAL_BANDS,NL_P,LSEX,LHF
  use global_XC,      ONLY:WF_xc_functional,WF_kind
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index
  use wrapper_omp,    ONLY:M_plus_alpha_M_omp
@@ -169,8 +169,10 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  ! Build the Hamiltonian
  ! =====================
  !
+!$OMP WORKSHARE  
  Ho_plus_Sigma=cZERO
  I_relax      =cZERO
+!$OMP END WORKSHARE 
  !
  do i_sp_pol=1,n_sp_pol
   do ik=1,QP_nk
@@ -217,10 +219,12 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
      !
    endif                                                         
    !
+!   if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_nl(dG,H_nl_sc,k,q,E,ik,i_sp_pol)
+   if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_vbands(V_bands,H_nl_sc,k,q,E,ik,i_sp_pol)
+   !
    if(l_use_Hxc_collisions) then
      call M_plus_alpha_M_omp(RT_nbands,cONE,RT_Vnl_xc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),ik,i_sp_pol),&
 &                                            H_nl_sc(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2)))
-
    endif
    !
    ! Damping: NL_damping fills the I_relax matrix 
diff --git a/src/nloptics/NL_driver.F b/src/nloptics/NL_driver.F
index 5e8132231f..cba5a617ea 100644
--- a/src/nloptics/NL_driver.F
+++ b/src/nloptics/NL_driver.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
-subroutine NL_driver(E,X,k,q,Dip)
+subroutine NL_driver(E,X,Xw,k,q,Dip)
  !
  use pars,           ONLY:pi,SP,cZERO,cONE,schlen
  use vec_operate,    ONLY:axis_rotation
@@ -14,7 +14,7 @@ subroutine NL_driver(E,X,k,q,Dip)
  use QP_m,           ONLY: QP_ng_SH,QP_ng_Sx 
  use LIVE_t,         ONLY:live_timing 
  use electrons,      ONLY:levels,n_sp_pol
- use collision_ext,  ONLY:COLLISIONS_CV_only 
+ use collision_ext,  ONLY:COLLISIONS_CV_only,HXC_collisions 
  use X_m,            ONLY:X_t
  use DIPOLES,        ONLY:DIPOLE_t,DIP_alloc,DIP_S
  use wave_func,      ONLY:WF,wf_ng
@@ -22,16 +22,18 @@ subroutine NL_driver(E,X,k,q,Dip)
  use hamiltonian,    ONLY:WF_Go_indx,H_QP_table_setup,H_potential
  use R_lattice,      ONLY:bz_samp,qindx_free
  use fields,         ONLY:Efield
+ use frequency,      ONLY:w_samp
  use QP_CTL_m,       ONLY:QP_apply
+ use QP_m,           ONLY:QP_nk
  use nl_optics,      ONLY:n_frequencies,Correlation,IPA,NL_alloc,NL_free,V_bands,eval_dG,NL_J,NL_initial_J,NL_initial_versor, &
 &                         NL_estep,NL_er,Integrator,l_use_DIPOLES,NL_P_t,JGM,TDDFT,NL_P,loop_on_angles,n_angles,&
 &                         E_ext_t,E_tot_t,E_ks_t,E_ks,E_tot,E_ext,runs_done,n_runs_todo,l_eval_CURRENT,NL_rot_axis,&
 &                         HF,SEX,NL_bands,NL_nbands,NL_alloc_k_distributed,NL_J_t,n_runs,loop_on_frequencies, &
-&                         l_pump_and_probe
+&                         l_pump_and_probe,eval_OSCLL,l_test_OSCLL,LSEX,LHF,Sigma_SEX_EQ
  use real_time,      ONLY:RT_step,NE_steps,NE_i_time,RT_nbands,RT_bands,eval_DFT,eval_HARTREE
  use IO_m,           ONLY:io_COLLs
  use parallel_int,   ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
- use parallel_m,     ONLY:PAR_IND_freqs,PAR_IND_freqs_ID,PAR_IND_Xk_ibz
+ use parallel_m,     ONLY:PAR_IND_freqs,PAR_IND_freqs_ID,PAR_IND_Xk_ibz,PAR_Xk_ibz_index
  use collision_ext,  ONLY:COLLISIONS_have_HARTREE
  !
 #include<memory.h>
@@ -42,12 +44,13 @@ subroutine NL_driver(E,X,k,q,Dip)
  type(bz_samp)     :: k,q
  type(X_t)         :: X(2)
  type(DIPOLE_t)    :: Dip
+ type(w_samp)      :: Xw(2)
  !
  logical, external :: stop_now
  ! 
  ! Work Space
  !
- integer           :: i_f,i1,i_sp_pol,wf_ng_save,io_err
+ integer           :: i_f,i1,i_sp_pol,wf_ng_save,io_err,ik,ik_mem
  character(schlen) :: l_message
  logical           :: l_dynamics_is_over
  integer           :: ID_RESTART
@@ -79,7 +82,8 @@ subroutine NL_driver(E,X,k,q,Dip)
  !
  ! Dipoles & WF dimensions
  !=========================
- call WF_and_dipole_dimensions(X,k)
+
+ call WF_and_dipole_dimensions(X(1),k)
  !
  ! QP dimensions
  !================
@@ -87,10 +91,7 @@ subroutine NL_driver(E,X,k,q,Dip)
  !
  ! Non_Linear parallization
  ! ===================================
- call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.,Dip=Dip,X=X(1))
- call PARALLEL_global_indexes(E,k,q,"Non_Linear",Dip=Dip,X=X(1))
- call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,CLEAN_UP=.TRUE.)
- call PARALLEL_WF_index( )
+ call setup_NL_parallelization()
  !
  !Load Dipole after the NL parallelization
  ! ========================================
@@ -100,11 +101,16 @@ subroutine NL_driver(E,X,k,q,Dip)
  !
  ! Generate bare_qpg if required
  !=============================
- if(eval_HARTREE.or.CORRELATION==JGM) call col_driver(maxval((/wf_ng,QP_ng_Sx,QP_ng_SH/)),q)
+ if(eval_HARTREE.or.CORRELATION==JGM.or.CORRELATION==LSEX.or.CORRELATION==LHF) &
+&  call col_driver(maxval((/wf_ng,QP_ng_Sx,QP_ng_SH/)),q)
  !
  ! Free memory: qindx_* and unused DIPOLES
  ! =====================================================
- call qindx_free('ALL')
+ if(.not.eval_OSCLL) then
+   call qindx_free('ALL')
+ else
+   call qindx_free('X B C')
+ endif
  !
  call DIP_alloc('P_square')
  if(l_use_DIPOLES) then
@@ -122,16 +128,26 @@ subroutine NL_driver(E,X,k,q,Dip)
  wf_ng_save =wf_ng
  if(eval_DFT) then
    wf_ng      =max(QP_ng_Sx,QP_ng_SH)
+ elseif(eval_OSCLL) then
+   wf_ng      =max(wf_ng,QP_ng_Sx,QP_ng_SH)
  else
    wf_ng      =QP_ng_SH
  endif
  WF_Go_indx =1
  !
+ if(eval_OSCLL.and..not.l_test_OSCLL) then
+    io_COLLs=.false.
+    HXC_collisions%N=0
+ endif
+ !
  if(io_COLLs) then
    !      
    ! check if they exist      
    !
    call COLLISIONS_load(.true.)
+   !
+   if(l_test_OSCLL.and..not.l_use_Hxc_collisions) &
+&    call error("You should calculate collisions before the testing with: yambo_nl -e -v f/sex")          
    !
    if(.not.l_use_Hxc_collisions .and. (Correlation==SEX.or.Correlation==HF)) &
 &    call error(' HF / SEX need Collisions. Cannot be computed on the fly.')
@@ -152,8 +168,27 @@ subroutine NL_driver(E,X,k,q,Dip)
    !
  endif
  !
- if(eval_HARTREE.or.eval_DFT) & 
-&   call WF_load(WF,wf_ng,WF_Go_indx,(/1,NL_bands(2)/),(/1,k%nibz/),space='R',title='-NL')  
+ if(eval_OSCLL) then
+   !
+   call setup_OSCLL_parallelization()
+   !
+   ! Evaluate oscillators if not present
+   call OSCLL_eval(q, k, E)
+   !
+   ! Load the LSEX potential W(q,G,G') or V(q,G,G')
+   call LSEX_potential(q, X(2), Xw(2))
+   !
+   ! Load oscillators
+   !
+   call OSCLL_load(q,k)
+   !
+   call setup_NL_parallelization()
+   !
+ endif
+ !
+ if(eval_HARTREE.or.eval_DFT) then
+   call WF_load(WF,wf_ng,WF_Go_indx,(/1,NL_bands(2)/),(/1,k%nibz/),space='R',title='-NL')  
+ endif
  !
  ! Load Collisions if necessary
  ! ============================
@@ -171,11 +206,33 @@ subroutine NL_driver(E,X,k,q,Dip)
  call NL_alloc(E) 
  call NL_alloc_k_distributed(E)
  !
+ if(l_test_OSCLL) then
+   call NL_test_collisions(k,q)
+   return
+ endif
+ !
  ! Initialization
  ! ==================
  call section('*','NL Start and Restart')
  call NL_start_and_restart(E,k,Dip)
  !
+ !
+ if(eval_OSCLL) then
+   !      
+   ! If I use oscillators I store the equilibrium part of long-range SEX
+   !
+   Sigma_SEX_EQ=cZERO
+   !
+   do ik=1,QP_nk
+     if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
+     ik_mem=PAR_Xk_ibz_index(ik)
+     do i_sp_pol=1,n_sp_pol
+       call OSCLL_compose_vbands(V_bands,Sigma_SEX_EQ(:,:,i_sp_pol,ik_mem),k,q,E,ik,i_sp_pol) 
+     enddo
+   enddo
+   !
+ endif
+ !
  ! Check if calculation was already done
  ! =====================================
  if(n_runs_todo==0) return
@@ -278,4 +335,40 @@ subroutine NL_driver(E,X,k,q,Dip)
  call DIP_alloc('DIP_R')
  call DIP_alloc('DIP_v')
  !
+ contains
+ !
+ subroutine setup_OSCLL_parallelization
+   use parallel_m,       ONLY:PAR_IND_G_b
+   implicit none
+   !
+   ! Restore original number of bands
+   !
+   X(1)%ib       =(/NL_bands(1),NL_bands(2)/)
+   !
+   call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.,X=X(1))
+   call PARALLEL_global_indexes(E,k,q,"Oscillators",X=X(1))
+   call PARALLEL_WF_distribute(B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.)
+   call PARALLEL_WF_index( )
+   !
+ end subroutine setup_OSCLL_parallelization
+ !
+ subroutine setup_NL_parallelization()
+   use parallel_m,       ONLY:PAR_IND_Xk_ibz
+   !
+   implicit none
+   !
+   ! Restore original number of bands
+   !
+   X(1)%ib       =(/NL_bands(1),NL_bands(2)/)
+   !
+   call PARALLEL_global_indexes(E,k,q," ",RESET=.TRUE.)
+   call PARALLEL_WF_distribute(CLEAN_UP=.TRUE.)
+   ! 
+   call PARALLEL_global_indexes(E,k,q,"Non_Linear",Dip=Dip,X=X(1))
+   call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,CLEAN_UP=.TRUE.)
+   call PARALLEL_WF_index( )
+   !
+ end subroutine setup_NL_parallelization
+
+ !
 end subroutine NL_driver
diff --git a/src/nloptics/NL_initialize.F b/src/nloptics/NL_initialize.F
index 4e0291123f..dadb8ffdbd 100644
--- a/src/nloptics/NL_initialize.F
+++ b/src/nloptics/NL_initialize.F
@@ -17,15 +17,17 @@ subroutine NL_initialize(E,k)
  use X_m,            ONLY:global_gauge
  use zeros,          ONLY:zero_dfl
  use com,            ONLY:msg
+ use QP_m,           ONLY:QP_ng_Sc,QP_ng_Sx,QP_ng_Sh
+ use collision_ext,  ONLY:ng_oscll
  use R_lattice,      ONLY:bz_samp,k_map
  use fields,         ONLY:Efield,Efield_strength,n_ext_fields,EtoT
  use real_time,      ONLY:RT_step,NE_steps,NE_tot_time,eval_HARTREE,eval_DFT,l_RT_induced_field,l_NE_with_fields, &
 &                         Phase_LifeTime,Integrator_name,RT_dyn_step,NE_i_time,RT_potential,RAD_LifeTime
  use RT_control,     ONLY:SETUP_RT_IO_type,OBS_RT_IO_t
  use nl_optics,      ONLY:l_use_DIPOLES,EULER,EULEREXP,RK2,RK4,RK2EXP,RK4EXP,HEUN,INVINT,Integrator,CRANK_NICOLSON, &
-&                         NL_damping,NL_correlation,Correlation,NL_er,NL_estep,n_frequencies,IPA,TDDFT,HARTREE,LRC,JGM, &
-&                         LRCW,SEX,HF,eval_dG,eval_COLLISIONS,NL_bands,NL_verb_name,VERB_HIGH,VERB_LOW,NL_verbosity, &
-&                         l_eval_CURRENT,l_pump_and_probe,DephMult,l_velocity_IPA,n_angles, &
+&                         NL_damping,NL_correlation,Correlation,NL_er,NL_estep,n_frequencies,IPA,TDDFT,HARTREE,LRC,JGM,  &
+&                         LRCW,SEX,HF,eval_dG,eval_COLLISIONS,NL_bands,NL_verb_name,VERB_HIGH,VERB_LOW,NL_verbosity,LSEX,&
+&                         l_eval_CURRENT,l_pump_and_probe,DephMult,l_velocity_IPA,n_angles,l_test_OSCLL,LHF,eval_OSCLL,  &
 &                         NL_ang_step,n_runs,NL_rot_axis,loop_on_frequencies,loop_on_angles,NL_initial_versor
  use electric,       ONLY:l_force_SndOrd,l_P_periodic
  use parser_m,       ONLY:parser
@@ -50,14 +52,11 @@ subroutine NL_initialize(E,k)
  call parser('InducedField' ,        l_RT_induced_field)
  call parser('EvalCurrent' ,         l_eval_CURRENT)
  call parser('FrPolPerdic' ,         l_P_periodic)
+ call parser('TestOSCLL',            l_test_OSCLL)
  call parser('NoComprCOLL',          l_no_compress_COLL)
  !
- COLLISIONS_load_SP =.not.l_no_compress_COLL
- COLLISIONS_compr   =.not.l_no_compress_COLL
- !
  if(l_use_DIPOLES) call warning(' Fixed dipoles: only linear response properties are correct!')
  if(l_P_periodic)  call msg('sr','[NL] Enforce periodicity of the polarization repect to Efield')
- if(SP==DP.and.COLLISIONS_load_SP) call msg('sr','COLLISIONS in single precisions')
  !
  ! Velocity gauge mode
  ! =======================================
@@ -281,6 +280,7 @@ subroutine NL_initialize(E,k)
  eval_DFT       =.false.
  eval_dG        =.false.
  eval_COLLISIONS=.false.
+ eval_OSCLL     =.false.
  !
  ! NB.: This part duplicates what is in
  !      INIT.F 
@@ -322,6 +322,22 @@ subroutine NL_initialize(E,k)
         eval_COLLISIONS=.true.
         l_sc_coh       =.true.
         l_sc_sex       =.true.
+    case('LSEX')
+        Correlation    = LSEX
+        RT_potential   ='LSEX'
+        eval_HARTREE   =.true.
+        ! Turn off Hartree if required
+        if(QP_ng_SH<=1) eval_HARTREE=.false.
+        eval_OSCLL     =.true.
+        eval_dG        =.false.
+    case('LHF')
+        Correlation    = LHF
+        RT_potential   ='LHF'
+        eval_HARTREE   =.true.
+        ! Turn off Hartree if required
+        if(QP_ng_SH<=1) eval_HARTREE=.false.
+        eval_OSCLL     =.true.
+        eval_dG        =.false.
     case('HF')
         Correlation=HF
         RT_potential   ='HARTREE+FOCK'
@@ -334,6 +350,50 @@ subroutine NL_initialize(E,k)
         call error("Unknow correlation in the response function")
  end select
  !
+ call msg('rs','Correlation in the reponse functions: '//trim(NL_correlation))
+ !
+ if(l_test_OSCLL.and.(Correlation==LSEX.or.Correlation==LHF)) then
+   !      
+   eval_COLLISIONS=.TRUE.
+   eval_dG        =.TRUE.
+   !
+   ! I force loading collision for testing porpouse
+   !
+   if(Correlation==LHF) then
+     RT_potential='FOCK'
+   elseif(Correlation==LSEX) then
+     RT_potential='SEX'
+   endif
+   !
+   call msg('s','')
+   call msg('s','*****************************************')
+   call msg('s','**** TEST TEST Oscillators TEST TEST ****')
+   call msg('s','*****************************************')
+   call msg('s','')
+   !
+ endif
+ !
+ if(eval_COLLISIONS) then
+   COLLISIONS_load_SP =.not.l_no_compress_COLL
+   COLLISIONS_compr   =.not.l_no_compress_COLL
+   if(SP==DP.and.COLLISIONS_load_SP) call msg('sr','COLLISIONS in single precisions')
+ endif
+ !   
+ ! Set the number of g-vectors for oscillators and W(q,G,G') or V(q,G)
+ !
+ if(Correlation==LHF.or.Correlation==LSEX) then
+   if(Correlation==LSEX.and.QP_ng_Sx/=QP_ng_Sc) &
+    &   call error(' EXXRLvcs /= CORRLvcs not possible with oscillator, set the same value in input') 
+   ng_oscll=QP_ng_Sx
+   call msg('rs','Number of G-vectors in oscillators :',ng_oscll)
+ endif
+ !
+ if(QP_ng_SH<=1) then
+   call msg('rs','Hartree term turned off ')
+ else
+   call msg('rs','Number of G-vectors in Hartree   :',QP_ng_SH)
+ endif
+ !
  if(eval_COLLISIONS) then
    !      
    if(COLLISIONS_CV_only) call warning(' ONLY cv scattering in COLLISIONS! ')
@@ -345,7 +405,6 @@ subroutine NL_initialize(E,k)
    endif
  endif
  !
- ! 
  H_potential = RT_potential
  !
  ! Frequencies or Angle range
diff --git a/src/nloptics/NL_project.dep b/src/nloptics/NL_project.dep
index fbac890c90..9c580b7e80 100644
--- a/src/nloptics/NL_project.dep
+++ b/src/nloptics/NL_project.dep
@@ -16,6 +16,7 @@
  NL_initialize.o
  NL_output.o
  NL_start_and_restart.o
+ NL_test_collisions.o
  RK_basestep.o
  el_density_vbands.o
 
diff --git a/src/nloptics/NL_start_and_restart.F b/src/nloptics/NL_start_and_restart.F
index 9c4f8c0000..59521e3339 100644
--- a/src/nloptics/NL_start_and_restart.F
+++ b/src/nloptics/NL_start_and_restart.F
@@ -20,7 +20,7 @@ subroutine NL_start_and_restart(E,k,Dip)
  use pars,           ONLY:cZERO,rZERO,cONE,lchlen
  use com,            ONLY:msg
  use electrons,      ONLY:levels,n_sp_pol
- use real_time,      ONLY:rho_reference
+ use real_time,      ONLY:rho_reference,eval_HARTREE,eval_DFT
  use R_lattice,      ONLY:bz_samp
  use X_m,            ONLY:X_t
  use DIPOLES,        ONLY:DIPOLE_t
@@ -71,7 +71,7 @@ subroutine NL_start_and_restart(E,k,Dip)
    forall(i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE
  enddo
  !
- if(Correlation/=IPA.and..not.COLLISIONS_have_HARTREE) then
+ if(Correlation/=IPA.and..not.COLLISIONS_have_HARTREE.and.(eval_HARTREE.or.eval_DFT)) then
    !  
    call NL_build_valence_bands(E,V_bands,VAL_BANDS,1)
    call el_density_vbands(E,k,full_rho,VAL_BANDS)
diff --git a/src/nloptics/NL_test_collisions.F b/src/nloptics/NL_test_collisions.F
new file mode 100644
index 0000000000..c1a8b93a24
--- /dev/null
+++ b/src/nloptics/NL_test_collisions.F
@@ -0,0 +1,92 @@
+!
+!        Copyright (C) 2000-2017 the LUMEN team
+!
+! Authors (see AUTHORS file for details): MG CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine NL_test_collisions(k,q) 
+ !
+ ! Test-collisions built on the fly from V/W 
+ !                   VS
+ ! collisions stored on disk (yambo_nl -e -v sex/f)
+ !
+ use pars,           ONLY:cZERO,SP,rZERO
+ use R_lattice,      ONLY:nXkibz,bz_samp,qindx_S
+ use collision_ext,  ONLY:HXC_COLL_element,HXC_collisions
+ use nl_optics,      ONLY:NL_bands
+ use com,            ONLY:msg
+ !
+ implicit none
+ ! 
+ type(bz_samp), intent(in)  :: k,q
+ !
+ integer :: i_kmq,i_qp,ib,ibp,i_coll,i_coll_mem
+ integer :: i_k,i_n,i_m,i_spin
+ integer :: ic1
+ complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz)
+ complex(SP) :: COLL_OSCLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz)
+ ! 
+ real(SP) :: max_diff,diff
+ !
+ call msg('s','Collisions Test COLL vs OSCLL')
+ call msg('s','')
+ !
+ max_diff=rZERO
+ !
+ do i_coll=1,HXC_collisions%N
+   !
+   i_k       =HXC_collisions%state(i_coll,3)
+   i_n       =HXC_collisions%state(i_coll,1)
+   i_m       =HXC_collisions%state(i_coll,2)
+   i_spin    =HXC_collisions%state(i_coll,4)
+   i_coll_mem=HXC_collisions%PAR_map(i_coll)
+   !
+   ! Spin not supported for the moment
+   !
+   call OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL_OSCLL) 
+   !
+   ic1=1
+   !
+   COLL=cZERO
+   !
+   do i_kmq=1,nXkibz
+     !
+     do ibp=NL_bands(1),NL_bands(2)
+       do ib=NL_bands(1),NL_bands(2)
+         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=="t") then
+           COLL(ib,ibp,i_kmq)=HXC_COLL_element(i_coll_mem)%v_c(ic1)
+           ic1=ic1+1
+         endif
+         !
+         diff=abs(COLL(ib,ibp,i_kmq)-COLL_OSCLL(ib,ibp,i_kmq))
+         write(*,'(6i5,2e14.6)') i_k,i_m,i_n,i_kmq,ib,ibp,abs(COLL(ib,ibp,i_kmq)),abs(COLL_OSCLL(ib,ibp,i_kmq))
+         if(max_diff<=diff) max_diff=diff
+         !
+       enddo
+     enddo
+     !
+   enddo
+   !
+ enddo 
+ !
+ call msg('s','')
+ call msg('s','Max error in collisions : ',max_diff)
+ call msg('s','')
+ !
+end subroutine NL_test_collisions
diff --git a/src/nloptics/el_density_vbands.F b/src/nloptics/el_density_vbands.F
index c2f6a35aae..896f4b373b 100644
--- a/src/nloptics/el_density_vbands.F
+++ b/src/nloptics/el_density_vbands.F
@@ -47,21 +47,23 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS)
  integer     :: i1,ik,is,rho_syms,ir,ik_mem
  real(SP)    :: rho_no_sym(fft_size)
  !
+!$OMP WORKSHARE  
  rho=rZERO
  rho_no_sym=rZERO
+!$OMP END WORKSHARE 
  !
  do ik=1,Xk%nibz
    !  
    if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
    ik_mem=PAR_Xk_ibz_index(ik)
    !
-!$omp parallel do default(shared), private(ir,is)
-   do ir=1,fft_size
-     do is=1,n_sp_pol
+   do is=1,n_sp_pol
+!$OMP WORKSHARE  
+     forall(ir=1:fft_size)
        rho_no_sym(ir)=rho_no_sym(ir)+real(spin_occ,SP)*Xk%weights(ik)*sum(abs(VAL_BANDS(ir,:en%nbf(is),ik_mem,is))**2._SP)
-     enddo
+     end forall
+!$OMP END WORKSHARE  
    enddo
-!$omp end parallel do
    !
  enddo
  !
@@ -69,20 +71,18 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS)
  !
  ! Simmetrization
  !
+!$OMP WORKSHARE  
  rho_syms=nsym/(i_time_rev+1)
+!$OMP END WORKSHARE  
  !
-!$omp parallel do default(shared), private(ir,i1)
- do ir=1,fft_size
-   do i1=1,rho_syms
-     rho(ir)=rho(ir)+real(rho_no_sym(fft_rot_r(ir,i1)),SP)/real(nsym,SP)
-   enddo
- enddo
-!$omp end parallel do
+!$OMP WORKSHARE  
+ forall(ir=1:fft_size)
+   rho(ir)=rho(ir)+sum(rho_no_sym(fft_rot_r(ir,1:rho_syms)))/real(nsym,SP)
+ end forall
+!$OMP END WORKSHARE  
  !
-!$omp parallel do default(shared), private(ir)
- do ir=1,fft_size
-   rho(ir)=(1._SP+i_time_rev)*rho(ir)
- enddo
-!$omp end parallel do
+!$OMP WORKSHARE  
+ rho=real(1._SP+i_time_rev,SP)*rho
+!$OMP END WORKSHARE  
  !
 end subroutine
diff --git a/src/output/K_OUTPUT.F b/src/output/K_OUTPUT.F
index feb4ac07ee..25dad42431 100644
--- a/src/output/K_OUTPUT.F
+++ b/src/output/K_OUTPUT.F
@@ -14,12 +14,12 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
 &                         diam_term_exact,para_term_w0,l_abs_prop_chi_bse,&
 &                         l_eels_from_inversion,Co_factor
  use BS,             ONLY:BSE_mode,BS_K_is_ALDA,BS_H_dim,BS_K_coupling,&
-&                         l_BS_anomalous_Hall
+&                         l_BS_anomalous_Hall,l_BS_Esort_indx
  use stderr,         ONLY:STRING_match
  use X_m,            ONLY:Epsilon_ii,eps_2_alpha,X_OUTPUT_driver,Epsilon_ij,&
 &                         X_do_obs,X_OUTPUT_messages,X_dichroism,Joint_DOS,BS_E_sorted,N_BS_E_sorted,&
-&                         X_magnons,i_G_shift,Q_plus_G_pt,Q_plus_G_sq_modulus,Resp_ii,Resp_ij,&
-&                         l_drude,skip_cond_sum_rule,X_drude_term,global_gauge
+&                         BS_E_sorted_indx,X_magnons,i_G_shift,Q_plus_G_pt,Q_plus_G_sq_modulus,&
+&                         Resp_ii,Resp_ij,l_drude,skip_cond_sum_rule,X_drude_term,global_gauge
  use com,            ONLY:msg,com_compose_msg
  use R_lattice,      ONLY:bare_qpg,FineGd_desc,nkbz
  use electrons,      ONLY:spin_occ
@@ -35,7 +35,7 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
  ! Work Space
  !
  character(2)      :: ordering
- integer           :: id,i_rsp,n_resp,iw_ref
+ integer           :: id,it,i_rsp,n_resp,iw_ref,index_sort(5)
  character(10)     :: solver
  character(schlen) :: out_string
  complex(SP)       :: hall(3,2),tmp_var(BSS_n_freqs,3),&
@@ -246,6 +246,18 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
      if (data_sort(1)==0._SP) cycle
      call X_OUTPUT_driver("WRITE E_IP",data=data_sort(:N_BS_E_sorted))
    enddo
+   if (l_BS_esort_indx) then
+   do id=1,min(BS_H_dim,1000)
+     data_sort(1)=real(BS_E_sorted(id,1,1),SP)
+     if (data_sort(1)==0._SP) cycle
+     do it=1,18
+       if (BS_E_sorted_indx(id,it,1)==0) exit
+       data_sort(1)=real(BS_E_sorted(id,1,1),SP)
+       index_sort(1:5)=real(BS_E_sorted_indx(id,it,1:5),SP)
+       call X_OUTPUT_driver("WRITE E_INDX_IP",data=data_sort(1:1),indexes=index_sort)
+     enddo
+   enddo
+   endif
  endif
  !
  ! E sorted
diff --git a/src/output/X_OUTPUT_driver.F b/src/output/X_OUTPUT_driver.F
index d35a546030..de20ff34f0 100644
--- a/src/output/X_OUTPUT_driver.F
+++ b/src/output/X_OUTPUT_driver.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
+subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
  !
  use pars,          ONLY:schlen,SP
  use D_lattice,     ONLY:l_0D,l_1D,l_2D
@@ -14,7 +14,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
  use electrons,     ONLY:n_spinor
  use PHOTOLUM,      ONLY:PL
  use BS,            ONLY:l_BS_kerr,l_BS_magnons,l_BS_dichroism,BS_K_coupling,&
-&                        l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort
+&                        l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort_indx,l_BS_esort
  use BS_solvers,    ONLY:l_abs_prop_chi_bse,l_eels_can_be_computed
  use stderr,         ONLY:STRING_match,STRING_split,intc,STRING_remove
  use OUTPUT,         ONLY:OUTPUT_driver
@@ -30,6 +30,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
  integer,      optional :: IQ,IG
  type(IO_desc),optional :: DESC
  real(SP),     optional :: data(:)
+ integer,      optional :: indexes(:)
  !
  ! Work Space
  integer            ::it,itp,ik,ic
@@ -39,20 +40,21 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
  !
  if (STRING_match(WHAT,"DEF")) then
    call ELEMENTAL_init(1,"fxc","Exchange-Correlation Kernel","optics")
-   call ELEMENTAL_init(2,"E_IP","Sorted IP Energies","sort")
-   call ELEMENTAL_init(3,"Esort","Sorted Interacting Energies","sort")
-   call ELEMENTAL_init(4,"pl","Photoluminescence","PL")
-   call ELEMENTAL_init(5,"eps","Absorption","optics")
-   call ELEMENTAL_init(6,"refl","Reflectivity","optics")
-   call ELEMENTAL_init(7,"eel","Electron Energy Loss","optics")
-   call ELEMENTAL_init(8,"alpha","Polarizability  ( -<<X>> )","optics")
-   call ELEMENTAL_init(9,"off","Off-diagonal Absorption","kerr")
-   call ELEMENTAL_init(10,"beta","Off-diagonal Polarizability","kerr")
-   call ELEMENTAL_init(11,"moke","MOKE parameters","kerr")
-   call ELEMENTAL_init(12,"jdos","Joint Density of states","optics")
-   call ELEMENTAL_init(13,"CD","Dichroism","dichroism")
-   call ELEMENTAL_init(14,"Spm","Magnons (S+-)","magnons")
-   call ELEMENTAL_init(15,"Smp","Magnons (S-p)","magnons")
+   call ELEMENTAL_init(2,"E_INDX_IP","Sorted IP Energies indexes","indexes")
+   call ELEMENTAL_init(3,"E_IP","Sorted IP Energies","sort")
+   call ELEMENTAL_init(4,"Esort","Sorted Interacting Energies","sort")
+   call ELEMENTAL_init(5,"pl","Photoluminescence","PL")
+   call ELEMENTAL_init(6,"eps","Absorption","optics")
+   call ELEMENTAL_init(7,"refl","Reflectivity","optics")
+   call ELEMENTAL_init(8,"eel","Electron Energy Loss","optics")
+   call ELEMENTAL_init(9,"alpha","Polarizability  ( -<<X>> )","optics")
+   call ELEMENTAL_init(10,"off","Off-diagonal Absorption","kerr")
+   call ELEMENTAL_init(11,"beta","Off-diagonal Polarizability","kerr")
+   call ELEMENTAL_init(12,"moke","MOKE parameters","kerr")
+   call ELEMENTAL_init(13,"jdos","Joint Density of states","optics")
+   call ELEMENTAL_init(14,"CD","Dichroism","dichroism")
+   call ELEMENTAL_init(15,"Spm","Magnons (S+-)","magnons")
+   call ELEMENTAL_init(16,"Smp","Magnons (S-p)","magnons")
  endif
  !
  if (STRING_match(WHAT,"INIT")) then
@@ -77,7 +79,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
      if (l_flag) call X_obs_onoff("+ refl") 
    endif
    if (l_BS_jdos) call X_obs_onoff("+ jdos")
-   if (l_BS_esort) call X_obs_onoff("+ Esort E_IP")
+   if (l_BS_esort)      call X_obs_onoff("+ Esort E_IP")
+   if (l_BS_esort_indx) call X_obs_onoff("+ E_INDX_IP")
    if (l_BS_kerr) then
      if (X_do_obs("alpha")) call X_obs_onoff("+ beta")
      if (X_do_obs("eps"))   call X_obs_onoff("+ off moke")
@@ -98,9 +101,9 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
 	 call warning("eps & alpha evaluation switched off for slepc")
        endif
      endif
-     if (STRING_match(solver,"Inversion")) call X_obs_onoff("- Esort E_IP kerr magnons jdos")
-     if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort jdos E_IP")
-     if (STRING_match(solver,"Diago").or.STRING_match(solver,"Slepc")) call X_obs_onoff("- E_IP")
+     if (STRING_match(solver,"Inversion")) call X_obs_onoff("- Esort E_IP E_INDX_IP kerr magnons jdos")
+     if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort jdos E_IP E_INDX_IP")
+     if (STRING_match(solver,"Diago").or.STRING_match(solver,"Slepc")) call X_obs_onoff("- E_IP E_INDX_IP")
    endif
    !
    ! Headers...
@@ -111,14 +114,18 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
      X_obs(1)%COL_header(1)   = 'E'
      X_obs(1)%COL_header(2:3) = (/'Re(q^2Fxc)','Re(q^2Fxc)'/)
    endif
+   if (X_do_obs("indexes")) then
+     X_obs(2)%N_columns=6
+     X_obs(2)%COL_header(1:6)  = (/' E  ','ikbz','ib1 ','ib2 ','isp1','isp2'/)
+   endif
    if (X_do_obs("Esort")) then
-     do ic=2,3
+     do ic=3,4
        X_obs(ic)%N_columns=2
        X_obs(ic)%COL_header(1:2)  = (/'E    ','N_deg'/)
      enddo
      if (BS_K_coupling) then
-       X_obs(3)%N_columns=3
-       X_obs(3)%COL_header(1:3)  = (/'Re(E)','Im(E)','N_deg'/)
+       X_obs(4)%N_columns=3
+       X_obs(4)%COL_header(1:3)  = (/'Re(E)','Im(E)','N_deg'/)
      endif
      obs_with_res=[character(5) :: "eps","alpha","Spm","Smp","off","beta","CD" ]
      do it=1,N_X_obs
@@ -128,16 +135,16 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
          l_print_res=l_print_res.or.trim(X_obs(it)%what)==trim(obs_with_res(itp))
        enddo
        if (.not.l_print_res) cycle
-       do ic=2,3
+       do ic=3,4
          X_obs(ic)%N_columns=X_obs(ic)%N_columns+1
          X_obs(ic)%COL_header(X_obs(ic)%N_columns) =trim(X_obs(it)%what) 
        enddo
      enddo
    endif
    if (X_do_obs("PL")) then
-     X_obs(4)%N_columns=3
-     X_obs(4)%COL_header(1)   = 'E'
-     X_obs(4)%COL_header(2:3) = (/'PL  ','PL_o'/)
+     X_obs(5)%N_columns=3
+     X_obs(5)%COL_header(1)   = 'E'
+     X_obs(5)%COL_header(2:3) = (/'PL  ','PL_o'/)
    endif
    !
    ! ... pre-formatted
@@ -145,7 +152,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
    BASIC_col(2:3) = (/'Im(XXX)', 'Re(XXX)'/)
    BASIC_col(4:5) = (/'Im(XXX_o)', 'Re(XXX_o)'/)
    BASIC_col(6:7) = (/'Im(XXX`)','Re(XXX`)'/)
-   do it=5,N_X_obs
+   do it=6,N_X_obs
      X_obs(it)%N_columns=7 
      do ic=1,X_obs(it)%N_columns
        X_obs(it)%COL_header(ic)=STRING_remove(BASIC_col(ic),"XXX",trim(X_obs(it)%what))
@@ -198,6 +205,11 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data)
      do ic=2,size(data)
        call OUTPUT_driver(trim(X_obs(it)%prefix),R_VALUES=data(ic:ic),TITLES=X_obs(it)%COL_header(ic:ic))
      enddo
+     if (present(indexes)) then
+       do ic=1,size(indexes)
+         call OUTPUT_driver(trim(X_obs(it)%prefix),I_VALUES=indexes(ic:ic),TITLES=X_obs(it)%COL_header(ic+1:ic+1))
+       enddo
+     endif
      call OUTPUT_driver(trim(X_obs(it)%prefix),action="write")
    enddo
  endif
diff --git a/src/parallel/.objects b/src/parallel/.objects
index 4d1c143fe4..c901f08ddc 100644
--- a/src/parallel/.objects
+++ b/src/parallel/.objects
@@ -1,6 +1,6 @@
 NL_objs =
 #if defined _NL
-NL_objs = PARALLEL_global_Non_Linear.o
+NL_objs = PARALLEL_global_Non_Linear.o 
 #endif
 #if _RT 
 RT_objs = PARALLEL_global_Real_Time.o
@@ -24,7 +24,7 @@ objs =  check_for_a_redundant_IO.o \
         PARALLEL_global_BZINDX.o \
         PARALLEL_global_defaults.o PARALLEL_global_reset.o PARALLEL_global_dimensions.o PARALLEL_global_ScaLapacK.o \
         PARALLEL_global_Response_G.o PARALLEL_global_Self_Energy.o PARALLEL_global_Response_T.o PARALLEL_global_Response_T_transitions.o \
-        PARALLEL_global_Response_IO.o PARALLEL_global_DIPOLES.o \
+        PARALLEL_global_Response_IO.o PARALLEL_global_DIPOLES.o PARALLEL_global_Oscillators.o \
         PARALLEL_global_indexes.o \
         PARALLEL_index.o PARALLEL_WF_distribute.o \
         PARALLEL_Haydock_VEC_COMMs.o PARALLEL_FREQS_setup.o $(RT_objs) $(NL_objs)
diff --git a/src/parallel/DOUBLE_project.dep b/src/parallel/DOUBLE_project.dep
index 5c51f98854..1e88f54b17 100644
--- a/src/parallel/DOUBLE_project.dep
+++ b/src/parallel/DOUBLE_project.dep
@@ -19,6 +19,7 @@
  PARALLEL_global_BZINDX.o
  PARALLEL_global_DIPOLES.o
  PARALLEL_global_Non_Linear.o
+ PARALLEL_global_Oscillators.o
  PARALLEL_global_Real_Time.o
  PARALLEL_global_Response_G.o
  PARALLEL_global_Response_IO.o
diff --git a/src/parallel/PARALLEL_global_Non_Linear.F b/src/parallel/PARALLEL_global_Non_Linear.F
index 7b33d6fa8b..e18fd65e70 100644
--- a/src/parallel/PARALLEL_global_Non_Linear.F
+++ b/src/parallel/PARALLEL_global_Non_Linear.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA MG DS
 !
-subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
+subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  !
  use electrons,     ONLY:levels
  use R_lattice,     ONLY:bz_samp
@@ -35,7 +35,6 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
- character(*)         :: ENVIRONMENT
  !
  ! Work space
  !
@@ -59,7 +58,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
  call PARALLEL_index(PAR_IND_freqs,(/n_runs/),COMM=PAR_COM_freqs_INDEX)
  PAR_IND_freqs_ID=PAR_COM_freqs_INDEX%CPU_id
  PAR_n_freqs     =PAR_IND_freqs%n_of_elements(PAR_IND_Q_ibz_ID+1)
- call PARALLEL_live_message("Freqs",ENVIRONMENT=ENVIRONMENT,         &
+ call PARALLEL_live_message("Freqs",ENVIRONMENT="Non_Linear",         &
 &                           LOADED=PAR_IND_freqs%n_of_elements(PAR_IND_freqs_ID+1), &
 &                           TOTAL=n_runs,NCPU=PAR_COM_freqs_INDEX%n_CPU)
  !
@@ -87,7 +86,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
  !
  ! Overlap indeces for WF distribution
  !
- call PARALLEL_live_message("H(ibz)",ENVIRONMENT=ENVIRONMENT,&
+ call PARALLEL_live_message("H(ibz)",ENVIRONMENT="Non_Linear",&
           LOADED=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1),TOTAL=Xk%nibz,&
           NCPU=PAR_COM_Xk_ibz_INDEX%n_CPU)
  !
@@ -116,7 +115,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
    PAR_IND_QP%element_1D(i_qp)=.TRUE.
    PAR_IND_QP%n_of_elements=PAR_nQP
  enddo
- call PARALLEL_live_message("QPs",ENVIRONMENT=ENVIRONMENT,LOADED=PAR_nQP,TOTAL=QP_n_states)
+ call PARALLEL_live_message("QPs",ENVIRONMENT="Non_Linear",LOADED=PAR_nQP,TOTAL=QP_n_states)
 
  !.........................................................................
  !   "COLLISIONS"
diff --git a/src/parallel/PARALLEL_global_Oscillators.F b/src/parallel/PARALLEL_global_Oscillators.F
new file mode 100644
index 0000000000..badc53f43c
--- /dev/null
+++ b/src/parallel/PARALLEL_global_Oscillators.F
@@ -0,0 +1,110 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA MG DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine PARALLEL_global_Oscillators(E,Xk,q)
+ !
+ use electrons,     ONLY:levels
+ use R_lattice,     ONLY:bz_samp
+ use collision_ext, ONLY:COLL_bands
+ use IO_int,        ONLY:IO_and_Messaging_switch
+ use parallel_int,  ONLY:PARALLEL_index,PARALLEL_assign_chains_and_COMMs,PARALLEL_live_message
+ use parallel_m,    ONLY:ncpu,COMM_copy,PAR_build_index,PAR_INDEX_copy
+ ! COMMUNICATORS
+ use parallel_m,    ONLY:PAR_COM_G_b_A2A,PAR_COM_Xk_ibz_INDEX,PAR_COM_WF_k_INDEX,PAR_G_bands_INDEX, &
+ &                       PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_INDEX,PAR_n_bands,n_WF_bands_to_load
+ ! IND
+ use parallel_m,    ONLY:PAR_IND_Xk_ibz,PAR_IND_G_b,PAR_IND_WF_k,PAR_IND_Xk_bz
+ ! INDEX
+ use parallel_m,    ONLY:PAR_Xk_ibz_index,PAR_Xk_bz_index
+ ! DIMENSIONS
+ use parallel_m,    ONLY:PAR_nG_bands,PAR_nQP,PAR_Xk_nbz,PAR_Xk_nibz
+ ! ID's
+ use parallel_m,    ONLY:PAR_IND_G_b_ID,PAR_IND_WF_k_ID,&
+&                        PAR_IND_Xk_bz_ID,PAR_IND_Xk_ibz_ID
+ use openmp,        ONLY:n_threads_OSCLL,OPENMP_set_threads
+ use QP_m,          ONLY:QP_n_states,QP_table
+ !
+#include<memory.h>
+ !
+ type(levels)         :: E
+ type(bz_samp)        :: Xk,q
+ !
+ ! Work space
+ !
+ integer      :: nb_mat 
+ !
+ CALL PARALLEL_structure(2,(/"k ","b "/))
+ !
+ call PARALLEL_assign_chains_and_COMMs(2,COMM_index_1=PAR_COM_Xk_ibz_INDEX, &
+&                                        COMM_index_2=PAR_COM_G_b_INDEX,&
+&                                        COMM_A2A_1=PAR_COM_Xk_ibz_A2A)
+ !
+ ! The routine PARALLEL_assign_chains_and_COMMs cannot define COMMUNICATORS for internal
+ ! A2A when there is no internal distribution
+ !
+if (PAR_COM_Xk_ibz_INDEX%n_CPU==1) then
+   call COMM_copy(PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_A2A)
+ endif
+ !
+ ! K-points
+ ! 
+ call PARALLEL_index(PAR_IND_Xk_ibz,(/Xk%nibz/),COMM=PAR_COM_Xk_ibz_INDEX)
+ PAR_IND_Xk_ibz_ID=PAR_COM_Xk_ibz_INDEX%CPU_id
+ PAR_Xk_nibz=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1)
+ call PARALLEL_live_message("K-points",ENVIRONMENT="Oscillators",         &
+&                           LOADED=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1), &
+&                           TOTAL=Xk%nibz,NCPU=PAR_COM_Xk_ibz_INDEX%n_CPU)
+ !
+ YAMBO_ALLOC(PAR_Xk_ibz_index,(Xk%nibz))
+ call PAR_build_index(PAR_IND_Xk_ibz,Xk%nibz,PAR_Xk_ibz_index,PAR_Xk_nibz)
+ !
+ ! Bands
+ !
+ nb_mat=(COLL_bands(2)-COLL_bands(1)+1)**2
+ !
+ n_WF_bands_to_load=PAR_n_bands(2)
+ !
+ call PARALLEL_index(PAR_IND_G_b,(/nb_mat/),COMM=PAR_COM_G_b_INDEX)
+ PAR_IND_G_b_ID=PAR_COM_G_b_INDEX%CPU_id
+ PAR_nG_bands=PAR_IND_G_b%n_of_elements(PAR_IND_G_b_ID+1)
+ YAMBO_ALLOC(PAR_G_bands_INDEX,(nb_mat))
+ call PAR_build_index(PAR_IND_G_b,nb_mat,PAR_G_bands_index,PAR_nG_bands)
+ !
+ call PARALLEL_live_message("OSCL bands",ENVIRONMENT="Oscillators",&
+&                             LOADED=PAR_IND_G_b%n_of_elements(PAR_IND_G_b_ID+1),&
+&                             TOTAL=nb_mat,NCPU=PAR_COM_G_b_INDEX%n_CPU)
+ !
+ ! Copy to WF
+ !
+ call COMM_copy(PAR_COM_Xk_ibz_INDEX,PAR_COM_WF_k_INDEX)
+ PAR_IND_WF_k_ID=PAR_COM_WF_k_INDEX%CPU_id
+ !
+ ! Io and messaging
+ !
+ call IO_and_Messaging_switch("+io_out",CONDITION=PAR_COM_G_b_INDEX%CPU_id==0)
+ !
+ call IO_and_Messaging_switch("+output",CONDITION=PAR_COM_G_b_INDEX%CPU_id==0)
+ !
+ call OPENMP_set_threads(n_threads_in=n_threads_OSCLL) 
+ !
+end subroutine PARALLEL_global_Oscillators
diff --git a/src/parallel/PARALLEL_global_defaults.F b/src/parallel/PARALLEL_global_defaults.F
index e2b9453bf6..3958936fe9 100644
--- a/src/parallel/PARALLEL_global_defaults.F
+++ b/src/parallel/PARALLEL_global_defaults.F
@@ -71,6 +71,8 @@ subroutine PARALLEL_global_defaults(ENVIRONMENT)
      call GIMME_a_parallel_proposal(3,(/"q","k","b"/))
    case("Non_Linear") 
      call GIMME_a_parallel_proposal(2,(/" k"," w"/))
+   case("Oscillators") 
+     call GIMME_a_parallel_proposal(2,(/" k"," b"/))
    case("ScaLapacK") 
      CPU_structure(i_PAR_structure)%ROLE(1)="p"
      CPU_structure(i_PAR_structure)%CPU(1)=ncpu
diff --git a/src/parallel/PARALLEL_global_dimensions.F b/src/parallel/PARALLEL_global_dimensions.F
index 8f94695e7c..f7d7a00a15 100644
--- a/src/parallel/PARALLEL_global_dimensions.F
+++ b/src/parallel/PARALLEL_global_dimensions.F
@@ -154,6 +154,11 @@ subroutine PARALLEL_global_dimensions(E,Xk,q,ENVIRONMENT)
  !-----------
  if (ENVIRONMENT=="Self_Energy"                     ) PAR_QP_range=QP_n_states
  !
+ ! Oscillators
+#if defined _NL || defined _RT
+ if (ENVIRONMENT=="Oscillators"                     ) PAR_n_bands =COLL_bands
+#endif
+ !
 #if defined _NL
  if (ENVIRONMENT=="Non_Linear"                      ) PAR_n_freqs =n_runs
 #endif
diff --git a/src/parallel/PARALLEL_global_indexes.F b/src/parallel/PARALLEL_global_indexes.F
index 7674d792af..a0f597799a 100644
--- a/src/parallel/PARALLEL_global_indexes.F
+++ b/src/parallel/PARALLEL_global_indexes.F
@@ -9,7 +9,7 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre
  !
  ! LOGICALS
  use parallel_m,    ONLY:l_par_X_T,l_par_RT,l_par_SE,l_par_X_G,l_par_X_G_finite_q,&
-&                        l_par_DIP,l_par_BZINDX,l_par_NL,l_par_SC
+&                        l_par_DIP,l_par_BZINDX,l_par_NL,l_par_SC,l_par_OSCLL
  ! COMMUNICATORS
  use parallel_m,    ONLY:PAR_COM_CON_INDEX_X,PAR_COM_VAL_INDEX_X
  ! IND
@@ -81,6 +81,7 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre
  l_par_RT           =ENVIRONMENT=="Real_Time"
  l_par_NL           =ENVIRONMENT=="Non_Linear"
  l_par_SC           =ENVIRONMENT=="Self_Energy"
+ l_par_OSCLL        =ENVIRONMENT=="Oscillators"
  !
  !==================================
  ! USER provided PARALLEL structure
@@ -148,7 +149,8 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre
 #endif
  !
 #if defined _NL
- if (      ENVIRONMENT=="Non_Linear")          call PARALLEL_global_Non_Linear(E,Xk,q,ENVIRONMENT)
+ if (      ENVIRONMENT=="Non_Linear")          call PARALLEL_global_Non_Linear(E,Xk,q)
+ if (      ENVIRONMENT=="Oscillators")         call PARALLEL_global_Oscillators(E,Xk,q)
 #endif
  !
 #if defined _SCALAPACK
diff --git a/src/setup/PARALLEL_and_IO_Setup.F b/src/setup/PARALLEL_and_IO_Setup.F
index 8ab4a2b090..bc8c4931f9 100644
--- a/src/setup/PARALLEL_and_IO_Setup.F
+++ b/src/setup/PARALLEL_and_IO_Setup.F
@@ -22,7 +22,7 @@ subroutine PARALLEL_and_IO_Setup(en,k)
  use parallel_int,   ONLY:PP_bcast,PP_redux_wait 
  use parser_m,       ONLY:parser
  use wave_func,      ONLY:WF_buffered_IO
- use openmp,         ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads
+ use openmp,         ONLY:n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads,n_threads_OSCLL
  use memory,         ONLY:USER_MEM_limit_string
  use LIVE_t,         ONLY:USER_wall_time_string
  use cuda_m,         ONLY:cuda_visible_devices,have_cuda_devices,cuda_gpu_subscription
@@ -84,7 +84,7 @@ subroutine PARALLEL_and_IO_Setup(en,k)
  ! CPU structure REPORT
  !======================
  !
- n_max_threads=maxval((/n_threads,n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL/))
+ n_max_threads=maxval((/n_threads,n_threads_X,n_threads_SE,n_threads_RT,n_threads_DIP,n_threads_NL,n_threads_OSCLL/))
  !
  if (ncpu>1.or.n_max_threads>1) then
    !
diff --git a/src/tddft/TDDFT_ALDA_R_space.F b/src/tddft/TDDFT_ALDA_R_space.F
deleted file mode 100644
index b1da0c3f50..0000000000
--- a/src/tddft/TDDFT_ALDA_R_space.F
+++ /dev/null
@@ -1,138 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2015 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM
-!
-function TDDFT_ALDA_R_space(is,os,isp,osp,tddft_wf,mode)
- !
- ! Calculates the F_xc scattering 
- !
- ! mode 1 
- !
- !  (ic(1),ik(1),is(1)) --<--:...:--<-- (ic(2),ik(2),is(2))
- !                              :Fxc:
- !  (iv(1),ik(3),is(3)) -->--:...:-->-- (iv(2),ik(4),is(4))
- !
- ! mode 2
- !
- !  (ic(1),ik(1),is(1)) --<--:...:--<-- (iv(2),ik(2),is(2))
- !                              :Fxc:
- !  (iv(1),ik(3),is(3)) -->--:...:-->-- (ic(2),ik(4),is(4))
- !
- use pars,           ONLY:SP,pi,cZERO
- use FFT_m,          ONLY:fft_size
- use xc_functionals, ONLY:F_xc_mat
- use BS,             ONLY:l_BS_magnons
- use wrapper_omp,    ONLY:V_dot_V_omp
- use electrons,      ONLY:n_spinor,n_sp_pol,n_spin
- use timing_m,       ONLY:timing
- !use interfaces,     ONLY:WF_apply_symm
- use TDDFT,          ONLY:tddft_wf_t
- !
- implicit none
- !
- complex(SP) :: TDDFT_ALDA_R_space
- !
- integer,          intent(in)  :: is(4),os(4),isp(4),osp(4)
- character(3),     intent(in)  :: mode
- type(tddft_wf_t), target, intent(inout) :: tddft_wf 
- ! 
- ! Work Space
- !
- integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y
- !
- complex(SP), pointer :: rhotwr1_p(:)
- complex(SP), pointer :: rhotwr2_p(:)
- complex(SP), pointer :: WF_symm1_p(:,:)
- complex(SP), pointer :: WF_symm2_p(:,:)
- !
- call timing('T_space ALDA scatt.',OPR='start')
- !
- if ((is(4)/=os(4).or.isp(4)/=osp(4)).and.((.not.l_BS_magnons).or. n_sp_pol==1)) call error(" TDDFT_ALDA_R_space: wrong spin index")
- if ((is(4)==os(4).or.isp(4)==osp(4)).and.((     l_BS_magnons).and.n_sp_pol==2)) call error(" TDDFT_ALDA_R_space: wrong spin index")
- if (mode/="RES" .and. mode/="CPL" ) call error(" TDDFT_ALDA_R_space: unkown mode")
- !
- TDDFT_ALDA_R_space=cZERO
- !
- WF_symm1_p => tddft_wf%WF_symm1
- WF_symm2_p => tddft_wf%WF_symm2
- rhotwr1_p  => tddft_wf%rhotwr1
- rhotwr2_p  => tddft_wf%rhotwr2
- !
- ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation
- !
- !
- call WF_apply_symm_cpu(is,WF_symm1_p)
- call WF_apply_symm_cpu(os,WF_symm2_p)
- !
- if(n_spinor==1) rhotwr1_p(:)=conjg(WF_symm1_p(:,1))*WF_symm2_p(:,1)
- !
- if(n_spinor==2) then
-   do i_spinor=1,n_spinor
-     do j_spinor=1,n_spinor
-       do ifft=1,fft_size
-         irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size
-         rhotwr1_p(irhotw)=conjg(WF_symm1_p(ifft,i_spinor))*WF_symm2_p(ifft,j_spinor)
-       enddo
-     enddo
-   enddo
- endif
- !
- if (mode=="RES") then
-   call WF_apply_symm_cpu(isp,WF_symm1_p) 
-   call WF_apply_symm_cpu(osp,WF_symm2_p)
- else if (mode=="CPL") then
-   call WF_apply_symm_cpu(isp,WF_symm2_p)
-   call WF_apply_symm_cpu(osp,WF_symm1_p)
- endif
- !
- if( n_spin==1 ) rhotwr2_p(:)=F_xc_mat(:,1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
- !
- if(n_sp_pol==2) then
-   ! This takes into account the different conventions between the definition of the spin indexes
-   ! between yambo and the derivation of fxc in spin space for magnons
-   ! This happens because in the yambo convention the left indexes "v sigma_v, c sigma_c, k" are inverted
-   ! with respect to the standard convention, i.e. "c sigma_c, v sigma_v, k"
-   ! As a result f_\up\dn,\dn\up is in yambo f_\dn\up,\dn\up and so on
-   is_yambo=os(4)
-   os_yambo=is(4)
-   ! To check/fix the spinorial version 
-   rhotwr2_p(:)=F_xc_mat(:,is_yambo,os_yambo,isp(4),osp(4))*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
- endif
- !
- if(n_spinor==2) then
-   rhotwr2_p=cZERO
-   do i_spinor=1,n_spinor
-     do j_spinor=1,n_spinor
-       i_spinor_y=j_spinor
-       j_spinor_y=i_spinor
-       do ip_spinor=1,n_spinor
-         do jp_spinor=1,n_spinor
-           do ifft=1,fft_size
-             irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size
-             rhotwr2_p(irhotw)=rhotwr2_p(irhotw)+                     &
-             &  F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* &
-             &  WF_symm1_p(ifft,ip_spinor)*conjg(WF_symm2_p(ifft,jp_spinor))
-           enddo
-         enddo
-       enddo
-     enddo
-   enddo
- endif
- !
- ! SUM
- !=====
- TDDFT_ALDA_R_space=V_dot_V_omp(fft_size*n_spinor*n_spinor,rhotwr1_p,rhotwr2_p)
- !
- ! tddft_alda_r_space should be mutiplied by X, it is mutiplied by Co in K
- ! X  = fft_size*spin_occ/DL_vol/Nq 
- ! Co =     4*pi*spin_occ/DL_vol/Nq
- ! --> X/Co = fft_size/4/pi
- !
- TDDFT_ALDA_R_space=TDDFT_ALDA_R_space*real(fft_size,SP)/4._SP/pi
- !
- call timing('T_space ALDA scatt.',OPR='stop')
- !
-end function
diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
index fe6d5270c3..3ce0e7f513 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
@@ -7,7 +7,7 @@
 !
 #include<dev_defs.h>
 !
-subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
+subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode)
  !
  ! Calculates the F_xc scattering 
  !
@@ -16,6 +16,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
  !
  use pars,           ONLY:SP,DP,pi,cZERO
  use FFT_m,          ONLY:fft_size
+ use wave_func,      ONLY:WF
+ use interfaces,     ONLY:WF_load,WF_free
  use R_lattice,      ONLY:qindx_X,bz_samp,minus_G
  use BS,             ONLY:l_BS_magnons,BS_T_grp,BS_bands
  use electrons,      ONLY:n_spinor,n_sp_pol,n_spin
@@ -27,12 +29,14 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
  implicit none
  !
  type(bz_samp),intent(in) :: Xk
- integer,      intent(in) :: iq,i_T_grp
+ integer,      intent(in) :: iq,i_T_grp,NG(2)
+ logical,      intent(in) :: l_bs_tddft_wf_in_loop
  character(1), intent(in) :: mode
  type(tddft_wf_t), target, intent(inout) :: tddft_wf 
  ! 
  ! Work Space
  !
+ logical     :: l_load_WFs
  integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y
  integer     :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,&
 &               i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2)
@@ -51,8 +55,6 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
  WF_symm2_p=cZERO
  rhotwr_p=  cZERO
  !
- ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation
- !
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp parallel default(shared), private( K_EXCH_collision, &
  !$omp &        i_T_el,i_k_bz,i_k,i_s, i_p_bz,i_p,i_sp, i_v,i_c,i_sp_c,i_sp_v, &
@@ -96,6 +98,20 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
      os(1)=i_c
    endif
    !
+   l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
+   if (l_load_WFs) then
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp critical
+#endif
+     if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
+     NK=(/min(i_k,i_p),max(i_k,i_p)/)
+     call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,&
+     &            space='R',title="Kernel exch",keep_states_to_load=.true.)
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp end critical
+#endif
+   endif
+   !
    if ( (is(4)/=os(4)) .and. ((.not.l_BS_magnons).or. n_sp_pol==1) ) call error(" TDDFT_ALDA_R_space: wrong spin index")
    if ( (is(4)==os(4)) .and. ((     l_BS_magnons).and.n_sp_pol==2) ) call error(" TDDFT_ALDA_R_space: wrong spin index")
    !
@@ -113,7 +129,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
        do j_spinor=1,n_spinor
          !
 	 do ifft=1,fft_size
-           rhotwr_p(ifft)=cmplx(WF_symm1_p(ifft,i_spinor)*conjg(WF_symm2_p(ifft,j_spinor)),kind=DP)
+	   rhotwr_p(ifft)=cmplx(WF_symm1_p(ifft,i_spinor)*conjg(WF_symm2_p(ifft,j_spinor)),kind=DP)
          enddo
          call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_L(:,i_T_el,i_spinor,j_spinor))
          !
@@ -123,6 +139,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,tddft_wf,mode)
    !
  enddo
  !
+ if(l_bs_tddft_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
+ !
  nullify(WF_symm1_p)
  nullify(WF_symm2_p)
  nullify(rhotwr_p)
@@ -202,31 +220,3 @@ subroutine perform_fft_3d(qs,rhotw,O_x)
 #endif
  !
 end subroutine perform_fft_3d
-!
-!
-subroutine TDDFT_oscillators_alloc_L(iGL)
- use electrons, ONLY:n_spin,n_spinor
- use BS,        ONLY:BS_T_grp,BS_n_g_fxc
-#include<memory.h>
- integer, intent(in) :: iGL
- if (BS_T_grp(iGL)%size>0) then
-   YAMBO_ALLOC(BS_T_grp(iGL)%O_tddft_L,(BS_n_g_fxc,BS_T_grp(iGL)%size,n_spinor,n_spinor))
-#ifdef _CUDA
-   allocate(BS_T_grp(iGL)%O_tddft_L_d,mold=BS_T_grp(iGL)%O_tddft_L)
-#endif
- endif
-end subroutine TDDFT_oscillators_alloc_L
-!
-!
-subroutine TDDFT_oscillators_free_L(iG_ref)
- use BS,        ONLY:BS_T_grp
-#include<memory.h>
- integer, intent(in) :: iG_ref
- integer :: iGL
- do iGL=iG_ref,1,-1
-   YAMBO_FREE(BS_T_grp(iGL)%O_tddft_L)
-#ifdef _CUDA
-   if (allocated(BS_T_grp(iGL)%O_tddft_L_d)) deallocate(BS_T_grp(iGL)%O_tddft_L_d)
-#endif
- enddo
-end subroutine TDDFT_oscillators_free_L
diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
index 10a0677f88..0c815df0e5 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
@@ -7,7 +7,7 @@
 !
 #include<dev_defs.h>
 !
-subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
+subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode)
  !
  ! Calculates the F_xc scattering 
  !
@@ -16,7 +16,9 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
  !
  use pars,           ONLY:SP,DP,pi,cZERO
  use FFT_m,          ONLY:fft_size
+ use wave_func,      ONLY:WF
  use xc_functionals, ONLY:F_xc_mat
+ use interfaces,     ONLY:WF_load,WF_free
  use R_lattice,      ONLY:qindx_X,bz_samp,minus_G
  use BS,             ONLY:l_BS_magnons,BS_T_grp,BS_bands
  use electrons,      ONLY:n_spinor,n_sp_pol,n_spin
@@ -28,20 +30,26 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
  implicit none
  !
  type(bz_samp),intent(in) :: Xk
- integer,      intent(in) :: iq,i_T_grp
+ integer,      intent(in) :: iq,i_T_grp,NG(2)
+ logical,      intent(in) :: l_bs_tddft_wf_in_loop
  character(1), intent(in) :: mode
  type(tddft_wf_t), target, intent(inout) :: tddft_wf 
  ! 
  ! Work Space
  !
+ logical     :: l_load_WFs
  integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y,&
 &               i_sp_loop,o_sp_loop
  integer     :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,&
 &               i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2)
  !
- complex(DP), pointer DEV_ATTR :: rhotwr_p(:)
- complex(SP), pointer DEV_ATTR :: WF_symm1_p(:,:)
- complex(SP), pointer DEV_ATTR :: WF_symm2_p(:,:)
+ complex(DP), pointer :: rhotwr_p(:)
+ complex(SP), pointer :: WF_symm1_p(:,:)
+ complex(SP), pointer :: WF_symm2_p(:,:)
+ !
+#if defined _CUDA
+ call error("TDDFT ALDA Osc. not GPU ported")
+#endif
  !
  call timing('T_space ALDA Osc.',OPR='start')
  !
@@ -100,6 +108,20 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
      os(1)=i_c
    endif
    !
+   l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
+   if (l_load_WFs) then
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp critical
+#endif
+     if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
+     NK=(/min(i_k,i_p),max(i_k,i_p)/)
+     call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,&
+     &            space='R',title="Kernel exch",keep_states_to_load=.true.)
+#if defined(__NOTNOW) && ! defined(_CUDA)
+     !$omp end critical
+#endif
+   endif
+   !
    if ( (is(4)/=os(4)) .and. ((.not.l_BS_magnons).or. n_sp_pol==1) ) call error(" TDDFT_ALDA_R_space: wrong spin index")
    if ( (is(4)==os(4)) .and. ((     l_BS_magnons).and.n_sp_pol==2) ) call error(" TDDFT_ALDA_R_space: wrong spin index")
    !
@@ -126,7 +148,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
          os_yambo=i_sp_loop
          ! To check/fix the spinorial version 
          rhotwr_p(:)=cmplx(F_xc_mat(:,is_yambo,os_yambo,is(4),os(4))* &
-         &              WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+         &                                             WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
          call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_R(:,i_T_el,i_sp_loop,o_sp_loop))
          !
        enddo
@@ -157,6 +179,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
    !
  enddo
  !
+ if(l_bs_tddft_wf_in_loop) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
+ !
  nullify(WF_symm1_p)
  nullify(WF_symm2_p)
  nullify(rhotwr_p)
@@ -164,31 +188,3 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,tddft_wf,mode)
  call timing('T_space ALDA Osc.',OPR='stop')
  !
 end subroutine TDDFT_ALDA_eh_space_G_collisions_R
-!
-!
-subroutine TDDFT_oscillators_alloc_R(iGR)
- use electrons, ONLY:n_spin,n_spinor
- use BS,        ONLY:BS_T_grp,BS_n_g_fxc
-#include<memory.h>
- integer, intent(in) :: iGR
- if (BS_T_grp(iGR)%size>0) then
-   YAMBO_ALLOC(BS_T_grp(iGR)%O_tddft_R,(BS_n_g_fxc,BS_T_grp(iGR)%size,n_spin,n_spin))
-#ifdef _CUDA
-   allocate(BS_T_grp(iGR)%O_tddft_R_d,mold=BS_T_grp(iGR)%O_tddft_R)
-#endif
- endif
-end subroutine TDDFT_oscillators_alloc_R
-!
-!
-subroutine TDDFT_oscillators_free_R(iG_ref)
- use BS,        ONLY:BS_T_grp
-#include<memory.h>
- integer, intent(in) :: iG_ref
- integer :: iGR
- do iGR=iG_ref,1,-1
-   YAMBO_FREE(BS_T_grp(iGR)%O_tddft_R)
-#ifdef _CUDA
-   if (allocated(BS_T_grp(iGR)%O_tddft_R_d)) deallocate(BS_T_grp(iGR)%O_tddft_R_d)
-#endif
- enddo
-end subroutine TDDFT_oscillators_free_R
diff --git a/src/wf_and_fft/WF_alloc.F b/src/wf_and_fft/WF_alloc.F
index f20538073a..99c8994d47 100644
--- a/src/wf_and_fft/WF_alloc.F
+++ b/src/wf_and_fft/WF_alloc.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine WF_alloc(WF)
+subroutine WF_alloc(WF,k_extrema_only)
  !
  use pars,          ONLY:cZERO
  use wave_func,     ONLY:WAVEs,states_to_load,wf_ng,wf_ncx
@@ -17,50 +17,57 @@ subroutine WF_alloc(WF)
 #include<memory.h>
  !
  type(WAVEs)           :: WF
+ logical, intent(in)   :: k_extrema_only
  !
  ! Work Space
  !
- integer :: ik,ib,i_sp_pol,N_total,wf_grid_size
+ integer :: ik,ib,i_sp_pol,NK,N_loaded,N_total,wf_grid_size
  !
  ! Distributed allocation
  !
  if(allocated(states_to_load)) then
    !
    WF%N=0
+   N_loaded=0
    N_total=0
    do i_sp_pol=WF%sp_pol(1),WF%sp_pol(2)
      do ik=WF%k(1),WF%k(2)
        do ib=WF%b(1),WF%b(2)
          N_total=N_total+1
-         if (states_to_load(ib,ik,i_sp_pol)) WF%N=WF%N+1
+         if (states_to_load(ib,ik,i_sp_pol)) then
+           N_loaded=N_loaded+1
+           if (.not.k_extrema_only) WF%N=WF%N+1
+           if (     k_extrema_only.and.(ik==WF%k(1).or.ik==WF%k(2))) WF%N=WF%N+1
+	 endif
        enddo
      enddo
    enddo
    !
  else
    !
-   WF%N=(WF%b(2)-WF%b(1)+1)*(WF%k(2)-WF%k(1)+1)*(WF%sp_pol(2)-WF%sp_pol(1)+1)
+   NK=(WF%k(2)-WF%k(1)+1)
+   if (k_extrema_only) NK=min(NK,2)
+   WF%N=(WF%b(2)-WF%b(1)+1)*NK*(WF%sp_pol(2)-WF%sp_pol(1)+1)
+   N_loaded=WF%N
    N_total=WF%N
    !
  endif
  !
+ call PARALLEL_live_message("Wave-Function states",LOADED=N_loaded,TOTAL=N_total)
+ !
  if (WF%space=='R')  wf_grid_size=fft_size
  if (WF%space=='G')  wf_grid_size=wf_ng
  if (WF%space=='C')  wf_grid_size=wf_ncx
  if (WF%space=='B')  wf_grid_size=wf_ncx
  !
  YAMBO_ALLOC(WF%c,(wf_grid_size,n_spinor,WF%N))
+ WF%c=cZERO
  if (have_cuda) then
    YAMBO_ALLOC_MOLD(WF%c_d,WF%c)
+   WF%c_d=cZERO
  endif
  !
- call PARALLEL_live_message("Wave-Function states",LOADED=WF%N,TOTAL=N_total)
- !
  YAMBO_ALLOC(WF%index,(WF%b(2),WF%k(2),WF%sp_pol(2)))
- !
- WF%c=cZERO
- if (have_cuda) WF%c_d=cZERO
- !
  WF%index=0
  !
 end subroutine
diff --git a/src/wf_and_fft/WF_free.F b/src/wf_and_fft/WF_free.F
index 2f6877cf88..c5c19a0ae3 100644
--- a/src/wf_and_fft/WF_free.F
+++ b/src/wf_and_fft/WF_free.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine WF_free(WF,keep_fft)
+subroutine WF_free(WF,keep_fft,keep_states_to_load)
  !
  use wave_func,  ONLY:WAVEs
  use pseudo,     ONLY:pp_is_uspp,PP_uspp_free
@@ -15,14 +15,19 @@ subroutine WF_free(WF,keep_fft)
  !
  type(WAVEs)       :: WF
  logical, optional :: keep_fft 
+ logical, optional :: keep_states_to_load
  !
  ! Work-space
  !
  logical           :: keep_fft_
+ logical           :: keep_states_to_load_
  !
  keep_fft_=.false.
  if (present(keep_fft)) keep_fft_=keep_fft
  !
+ keep_states_to_load_=.false.
+ if (present(keep_states_to_load)) keep_states_to_load_=keep_states_to_load
+ !
  ! USPP
  if (pp_is_uspp) call PP_uspp_free()
  !
@@ -44,6 +49,9 @@ subroutine WF_free(WF,keep_fft)
  WF%b=0
  WF%sp_pol=0
  WF%space=' '
+ !
+ if (keep_states_to_load_) return
+ !
  WF%to_load=.TRUE.
  !
 end subroutine
diff --git a/src/wf_and_fft/WF_load.F b/src/wf_and_fft/WF_load.F
index 0f42af3ce4..52e954fb15 100644
--- a/src/wf_and_fft/WF_load.F
+++ b/src/wf_and_fft/WF_load.F
@@ -11,6 +11,7 @@
 !! @param[in]       iGo_max_in                  ???
 !! @param[in]       bands_to_load               bands range 
 !! @param[in]       kpts_to_load                k-points range 
+!! @param[in]       k_extrema_only              k-points, load only !first and last in range
 !! @param[in]       sp_pol_to_load              spin-polarization range 
 !! @param[in]       space                       R = real-space,  G = G-space, C = ?, B = ?
 !! @param[in]       title                       purpose of the wave-function loading
@@ -21,12 +22,12 @@
 !!
 !! @param[out]      WF                  Wave-function object
 !
-subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load,&
+subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load,k_extrema_only,&
 &                  space,title,impose_free_and_alloc,force_WFo,keep_states_to_load,quiet)
  !
  ! Load and (eventually) FFTs the wavefunctions
  !
- use pars,          ONLY:SP,DP,schlen
+ use pars,          ONLY:SP,DP,schlen,cZERO
  use com,           ONLY:msg,core_io_path,fat_log
  use vec_operate,   ONLY:c2a
  use stderr,        ONLY:intc
@@ -58,6 +59,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  integer,     optional :: sp_pol_to_load(2)
  character(*),optional :: space
  character(*),optional :: title
+ logical,     optional :: k_extrema_only
  logical,     optional :: impose_free_and_alloc
  logical,     optional :: force_WFo
  logical,     optional :: keep_states_to_load
@@ -69,16 +71,18 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  character(schlen)::wf_title
  integer          ::i_sp_pol,ikibz,ifrag,ib_grp,ib_to_load,ib,ib1,ib2,i_spinor,ic,ig,igfft,&
 &                   iG_max,iG_bounds_loaded(2),iGo_max,sp_pol_to_load_(2),wf_grid_size,&
-&                   npwk
+&                   npwk,N
  real(SP)         ::mndp,mxdp,xk(3)
  complex(SP)      ::c
- logical          ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,force_WFo_,QUIET_wf,SIZE_msg
+ logical          ::loaded_bounds_ok,free_and_alloc,buffer_is_ok,clean_up_states,&
+                    k_extrema_only_,force_WFo_,QUIET_wf,SIZE_msg
  complex(SP), allocatable :: wf_disk(:,:,:)
  complex(DP), allocatable :: wf_DP(:)
  !
  complex(DP), allocatable DEV_ATTR :: wf_DP_d(:)
  complex(SP), allocatable DEV_ATTR :: wf_disk_d(:,:,:)
- complex(SP), pointer     DEV_ATTR :: wfc_d(:,:,:)
+ complex(SP),     pointer DEV_ATTR :: wfc_d(:,:,:)
+ !
  integer :: WFN
  !
  ! I/O
@@ -104,6 +108,9 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  force_WFo_=.false.
  if(present(force_WFo)) force_WFo_=force_WFo
  !
+ k_extrema_only_=.false.
+ if(present(k_extrema_only)) k_extrema_only_=k_extrema_only
+ !
  sp_pol_to_load_=(/1,n_sp_pol/)
  if(present(sp_pol_to_load)) sp_pol_to_load_=sp_pol_to_load
  !
@@ -182,6 +189,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  !
  QUIET_wf = .false.
  if (kpts_to_load(1)==kpts_to_load(2).and.kpts_to_load(2)>1) QUIET_wf = .true.
+ if (k_extrema_only_.and.kpts_to_load(1)>1)                  QUIET_wf = .true.
  if (present(quiet))                                         QUIET_wf = quiet
  if (QUIET_wf) call IO_and_Messaging_switch("-report -log")
  SIZE_msg=.not.QUIET_wf.and.(any((/fft_dim_loaded/=fft_dim/)).or.any((/fft_dim==0/)))
@@ -255,7 +263,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  !
  ! Memory allocation
  !
- call WF_alloc(WF)
+ call WF_alloc(WF,k_extrema_only_)
  !
  if (WF%space=='R')  wf_grid_size=fft_size
  if (WF%space=='G')  wf_grid_size=wf_ng
@@ -304,6 +312,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
    do ikibz=1,nkibz
      !
      if (any( (/ikibz<WF%k(1), ikibz>WF%k(2)/) )) cycle
+     if (k_extrema_only_ .and. all( (/ikibz>WF%k(1), ikibz<WF%k(2)/) )) cycle
      !
      ! USPP init
      !
@@ -401,7 +410,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
               !
               WFN = WF%N
               !$cuf kernel do(1)
-              do ic=1,wf_grid_size
+              do ic=1,fft_size
                 wfc_d(ic,i_spinor,WFN)=cmplx(wf_DP_d(ic),kind=SP)*fft_norm
               enddo
               !
@@ -552,12 +561,13 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  !
  mndp=10._SP
  mxdp=-1._SP
+ N=size(WF%c(:,:,WF%index(1,1,1)))
  do ib1=1,min(int(nel)+5,WF%b(2))
    do ib2=1,min(int(nel)+5,WF%b(2))
      !
      if ( WF%index(ib1,1,1)==0 .or. WF%index(ib2,1,1)==0 ) cycle
      !
-     c=Vstar_dot_V( size(WF%c(:,:,WF%index(1,1,1))),  WF%c(:,:,WF%index(ib1,1,1) ) , WF%c(:,:,WF%index(ib2,1,1)) )
+     c=Vstar_dot_V( N, WF%c(:,:,WF%index(ib1,1,1) ) , WF%c(:,:,WF%index(ib2,1,1)) )
      !
      if (abs(c)>mxdp) mxdp=abs(c)
      if (abs(c)<mndp) mndp=abs(c)