From 4fb7b9e3a09342864a48873b76e50d8dab4b766d Mon Sep 17 00:00:00 2001 From: Thomas Ponweiser Date: Thu, 21 Aug 2014 15:31:30 +0200 Subject: [PATCH] added utilty_zgemmm; refs #7 --- src/postw90/get_oper.F90 | 29 +++++-------------- src/utility.F90 | 60 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/src/postw90/get_oper.F90 b/src/postw90/get_oper.F90 index f54a0f1..cb86957 100644 --- a/src/postw90/get_oper.F90 +++ b/src/postw90/get_oper.F90 @@ -1629,38 +1629,23 @@ subroutine get_gauge_overlap_matrix(ik_a, ns_a, ik_b, ns_b, S_o, S, H) use w90_constants, only : dp, cmplx_0 use w90_postw90_common, only : v_matrix use w90_parameters, only : num_wann, eigval + use w90_utility, only : utility_zgemmm integer, intent(in) :: ik_a, ns_a, ik_b, ns_b complex(kind=dp), dimension(:,:), intent(in) :: S_o - complex(kind=dp), dimension(:,:), intent(out) :: S - complex(kind=dp), dimension(:,:), intent(out), optional :: H + complex(kind=dp), dimension(:,:), intent(out), optional :: S, H - complex(kind=dp), dimension(:,:), allocatable :: tmp - - integer :: wm_a, wm_b, i, j + integer :: wm_a, wm_b call get_win_min(ik_a, wm_a) call get_win_min(ik_b, wm_b) - allocate(tmp(ns_a,num_wann)) - - call gemm(S_o(wm_a:wm_a+ns_a-1, wm_b:wm_b+ns_b-1), & - v_matrix(1:ns_b, 1:num_wann, ik_b), & - tmp, 'N', 'N') + call utility_zgemmm(v_matrix(1:ns_a, 1:num_wann, ik_a), 'C', & + S_o(wm_a:wm_a+ns_a-1, wm_b:wm_b+ns_b-1), 'N', & + v_matrix(1:ns_b, 1:num_wann, ik_b), 'N', & + S, eigval(:,ik_a), H) - call gemm(v_matrix(1:ns_a, 1:num_wann, ik_a), & - tmp, & - S, 'C', 'N') - - if(present(H)) then - forall(i=1:ns_a, j=1:num_wann) - tmp(i,j) = tmp(i,j) * eigval(i,ik_a) - end forall - call gemm(v_matrix(1:ns_a, 1:num_wann, ik_a), & - tmp, & - H, 'C', 'N') - end if end subroutine get_gauge_overlap_matrix end module w90_get_oper diff --git a/src/utility.F90 b/src/utility.F90 index 3483dd4..325a359 100644 --- a/src/utility.F90 +++ b/src/utility.F90 @@ -30,6 +30,7 @@ module w90_utility public :: utility_lowercase public :: utility_strip public :: utility_zgemm + public :: utility_zgemmm public :: utility_translate_home public :: utility_rotate public :: utility_rotate_old @@ -81,7 +82,64 @@ subroutine utility_zgemm(c,a,transa,b,transb,n) end subroutine utility_zgemm - + !=============================================================! + subroutine utility_zgemmm(a, transa, b, transb, c, transc, & + prod1, eigval, prod2) + !===============================================================! + ! Returns the complex matrix-matrix-matrix product ! + ! --> prod1 = op(a).op(b).op(c), ! + ! where op(a/b/c) are defined according to transa/transb/transc ! + ! (see also documentation of utility_zgemm above) ! + ! ! + ! If eigval and prod2 are present, also ! + ! --> prod2 = op(a).diag(eigval).op(b).op(c) ! + ! is returned. ! + !===============================================================! + use blas95, only : gemm + + complex(kind=dp), dimension(:,:), intent(in) :: a, b, c + character(len=1), intent(in) :: transa, transb, transc + real(kind=dp), dimension(:), optional, & + intent(in) :: eigval + complex(kind=dp), dimension(:,:), optional, & + intent(out) :: prod1, prod2 + + complex(kind=dp), dimension(:,:), allocatable :: tmp + integer :: na, nb, mc, i, j + + ! query matrix sizes + ! naming convention: + ! matrix op(a) [resp. op(b) and op(c)] is of size na x ma [resp. nb x mb and nc x mc] + ! only nb (=ma) and mc are explicitly needed + if(transb /= 'N') then + nb = size(b,2) + else + nb = size(b,1) + end if + if(transc /= 'N') then + mc = size(c,1) + else + mc = size(c,2) + end if + + ! tmp = op(b).op(c) + allocate(tmp(nb,mc)) + call gemm(b, c, tmp, transb, transc) + + ! prod1 = op(a).tmp + if(present(prod1)) then + call gemm(a, tmp, prod1, transa, 'N') + end if + + if(present(prod2) .and. present(eigval)) then + ! tmp = diag(eigval).tmp + forall(i=1:nb, j=1:mc) + tmp(i,j) = eigval(i) * tmp(i,j) + end forall + ! prod2 = op(a).tmp + call gemm(a, tmp, prod2, transa, 'N') + end if + end subroutine !=================================================================== subroutine utility_inv3 (a, b, det) !