From f6d3b134fb96ce488318cd630ac19977e2d1792e Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <1138669+ohearnk@users.noreply.github.com> Date: Sun, 17 Mar 2024 21:39:07 -0400 Subject: [PATCH 1/2] Add support for builds with Intel OneAPI / LLVM compilers. MPI updates to resolve issue with Intel MPI libraries. --- cmake/AmberCompilerConfig.cmake | 20 +++++--- cmake/CompilerFlags.cmake | 73 ++++++++++++++++++++++++++++++ src/calMP2.f90 | 2 +- src/dnc/dnc_hfoperator.f90 | 2 +- src/dnc/dnc_scf.f90 | 6 +-- src/modules/quick_mpi_module.f90 | 2 +- src/modules/quick_timer_module.f90 | 2 +- src/mpi_setup.f90 | 8 ++-- src/obsolete/electdii.f90 | 6 +-- 9 files changed, 101 insertions(+), 20 deletions(-) diff --git a/cmake/AmberCompilerConfig.cmake b/cmake/AmberCompilerConfig.cmake index 79a7d51a3..3c73c1e3c 100644 --- a/cmake/AmberCompilerConfig.cmake +++ b/cmake/AmberCompilerConfig.cmake @@ -2,7 +2,7 @@ # Handle the COMPILER option # -------------------------------------------------------------------- -set(ALL_COMPILER_VALUES GNU INTEL PGI CRAY CLANG MSVC AUTO MANUAL) +set(ALL_COMPILER_VALUES GNU INTEL INTELLLVM ONEAPI PGI CRAY CLANG MSVC AUTO MANUAL) # help message displayed when COMPILER is unset or invalid set(COMPILER_HELP " @@ -10,20 +10,24 @@ set(COMPILER_HELP " ----------------------------------------------------------------------- COMPILER value | C executable | C++ executable | Fortran executable | tested versions -------------------------------------------------------------------------------------- - GNU | gcc | g++ | gfortran | 4.8.5+ - INTEL | icc | icpc | ifort | 19 - PGI | pgcc | pgc++ | pgf90 | + GNU | gcc | g++ | gfortran | 6.0 + + INTEL | icc | icpc | ifort | 19 - 22 + INTELLLVM | icx | icpx | ifx | 2024 + ONEAPI | icx | icpx | ifx | 2024 + PGI | pgcc | pgc++ | pgf90 | 14.9, 15.4, 16.5 CLANG | clang | clang++ | gfortran | - CRAY | cc | CC | ftn | + CRAY | cc | CC | ftn | 8.4.6* -------------------------------------------------------------------------------------- AUTO | MANUAL | _COMPILER CMake variables> + -------------------------------------------------------------------------------------- + Note that INTEL is Intel classic, INTELLLVM is Intel oneAPI, and ONEAPI is INTELLLVM. ") #create actual option -set(COMPILER "" CACHE STRING "Compiler to build Amber with. Valid values: GNU, INTEL, PGI, CRAY, CLANG, MSVC, AUTO, MANUAL. If 'auto', autodetect the host compiler, or use the CC,CXX,and FC variables if they are set. +set(COMPILER "" CACHE STRING "Compiler to build Amber with. Valid values: GNU, INTEL, INTELLLVM, ONEAPI, PGI, CRAY, CLANG, MSVC, AUTO, MANUAL. If 'auto', autodetect the host compiler, or use the CC,CXX,and FC variables if they are set. This option can ONLY be set the first time CMake is run. If you want to change it, delete the build directory and start over.") set(COMPILER_VALID FALSE) @@ -139,6 +143,10 @@ if(FIRST_RUN) set_compiler(C icc) set_compiler(CXX icpc) set_compiler(Fortran ifort) + elseif(${COMPILER} STREQUAL INTELLLVM OR ${COMPILER} STREQUAL ONEAPI) + set_compiler(C icx) + set_compiler(CXX icpx) + set_compiler(Fortran ifx) elseif(${COMPILER} STREQUAL PGI) set_compiler(C pgcc) set_compiler(CXX pgc++) diff --git a/cmake/CompilerFlags.cmake b/cmake/CompilerFlags.cmake index 41a3002cc..0d2c9f0de 100644 --- a/cmake/CompilerFlags.cmake +++ b/cmake/CompilerFlags.cmake @@ -333,6 +333,79 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") set(OPT_CXXFLAGS -O3) endif() +#IntelLLVM aka Intel oneAPI +#--------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +if("${CMAKE_C_COMPILER_ID}" STREQUAL "IntelLLVM") + set(CMAKE_C_FLAGS_DEBUG "-g -debug all") + set(OPT_CFLAGS -ip -O3) + + # How flags get set for optimization depend on whether we have a MIC processor, + # the version of Intel compiler we have, and whether we are cross-compiling + # for multiple versions of SSE support. The following coordinates all of this. + # This was done assuming that MIC and SSE are mutually exclusive and that we want + # SSE instructions included only when optimize = yes. Note that use of an + # SSE_TYPES specification needs to be given in place of xHost not in addition to. + # This observed behavior is not what is reported by the Intel man pages. BPK + + if(SSE) + # BPK removed section that modified O1 or O2 to be O3 if optimize was set to yes. + # We already begin with the O3 setting so it wasn't needed. + # For both coptflags and foptflags, use the appropriate settings + # for the sse flags (compiler version dependent). + if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 11 OR ${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 11) + if(NOT "${SSE_TYPES}" STREQUAL "") + list(APPEND OPT_CFLAGS "-ax${SSE_TYPES}") + else() + list(APPEND OPT_CFLAGS -xHost) + endif() + else() + list(APPEND OPT_CFLAGS -axSTPW) + endif() + + endif() +endif() + +if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "IntelLLVM") + + if(WIN32) + add_flags(Fortran /D_CRT_SECURE_NO_WARNINGS) + set(OPT_FFLAGS "/Ox") + set(CMAKE_Fortran_FLAGS_DEBUG "/Zi") + else() + set(CMAKE_Fortran_FLAGS_DEBUG "-g -debug all") + set(OPT_FFLAGS -ip -O3) + + if(SSE) + if("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER 11 OR ${CMAKE_Fortran_COMPILER_VERSION} VERSION_EQUAL 11) + if(NOT "${SSE_TYPES}" STREQUAL "") + list(APPEND OPT_FFLAGS "-ax${SSE_TYPES}") + else() + list(APPEND OPT_FFLAGS -xHost) + endif() + else() + list(APPEND OPT_FFLAGS -axSTPW) + endif() + endif() + + # warning flags + add_flags(Fortran "-warn all" "-warn nounused") + + option(IFORT_CHECK_INTERFACES "If enabled and Intel Fortran is in use, then ifort will check that types passed to functions are the correct ones, and produce warnings or errors for mismatches." FALSE) + + if(NOT IFORT_CHECK_INTERFACES) + # disable errors from type mismatches + add_flags(Fortran -warn nointerfaces) + endif() + + endif() +endif() + +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM") + set(CMAKE_CXX_FLAGS_DEBUG "-g -debug all") + set(OPT_CXXFLAGS -O3) +endif() + # PGI #--------------------------------------------------------------------------------------------------------------------------------------------------------------------- if("${CMAKE_C_COMPILER_ID}" STREQUAL "PGI") diff --git a/src/calMP2.f90 b/src/calMP2.f90 index 461b02c45..1f2d5f46d 100644 --- a/src/calMP2.f90 +++ b/src/calMP2.f90 @@ -485,7 +485,7 @@ subroutine MPI_calmp2 else do i=1,mpisize-1 ! receive integrals from slave nodes - call MPI_RECV(temp4d,nbasis*ivir*iocc*nsteplength,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) + call MPI_RECV(temp4d,nbasis*ivir*iocc*nsteplength,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) ! and sum them into operator do j1=1,nbasis do k1=1,ivir diff --git a/src/dnc/dnc_hfoperator.f90 b/src/dnc/dnc_hfoperator.f90 index f844075fa..fd5e0093f 100644 --- a/src/dnc/dnc_hfoperator.f90 +++ b/src/dnc/dnc_hfoperator.f90 @@ -427,7 +427,7 @@ subroutine mpi_hfoperatordc(oneElecO) else do i=1,mpisize-1 ! receive opertors from slave nodes - call MPI_RECV(temp2d,nbasis*nbasis,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) + call MPI_RECV(temp2d,nbasis*nbasis,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) ! and sum them into operator do ii=1,nbasis do jj=1,nbasis diff --git a/src/dnc/dnc_scf.f90 b/src/dnc/dnc_scf.f90 index c73131ed8..febb83f42 100644 --- a/src/dnc/dnc_scf.f90 +++ b/src/dnc/dnc_scf.f90 @@ -313,11 +313,11 @@ subroutine electdiisdc(jscf,PRMS) do i=1,mpi_dc_fragn(ittt) itt=mpi_dc_frag(ittt,i) call MPI_RECV(codcsub(1:NNmax,1:NNmax,itt),NNmax*NNmax, & - mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) call MPI_RECV(codcsubtran(1:NNmax,1:NNmax,itt),NNmax*NNmax, & - mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) call MPI_RECV(evaldcsub(itt,1:NNmax),NNmax,mpi_double_precision, & - ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) enddo enddo endif diff --git a/src/modules/quick_mpi_module.f90 b/src/modules/quick_mpi_module.f90 index 03b7e1131..06d7f4234 100644 --- a/src/modules/quick_mpi_module.f90 +++ b/src/modules/quick_mpi_module.f90 @@ -34,7 +34,7 @@ module quick_mpi_module logical :: master = .true. ! flag to show if the node is master node logical :: bMPI = .true. ! flag to show if MPI is turn on logical :: libMPIMode = .false. ! if mpi is initialized somewhere other than quick - integer, allocatable :: MPI_STATUS(:) + integer, allocatable :: QUICK_MPI_STATUS(:) integer, parameter :: MIN_1E_MPI_BASIS=6 integer, allocatable :: mgpu_ids(:) integer :: mgpu_id diff --git a/src/modules/quick_timer_module.f90 b/src/modules/quick_timer_module.f90 index ab8172729..57ea6452b 100644 --- a/src/modules/quick_timer_module.f90 +++ b/src/modules/quick_timer_module.f90 @@ -373,7 +373,7 @@ subroutine timer_output(io) ! MPI_timer_cumer=timer_cumer ! max_timer_cumer=timer_cumer ! do i=1,mpisize-1 -! call MPI_RECV(tmp_timer_cumer,1,mpi_timer_cumer_type,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) +! call MPI_RECV(tmp_timer_cumer,1,mpi_timer_cumer_type,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) ! MPI_timer_cumer%TTotal=MPI_timer_cumer%TTotal+tmp_timer_cumer%T2e+tmp_timer_cumer%TMP2+ & ! tmp_timer_cumer%T1e+ tmp_timer_cumer%TDiag+tmp_timer_cumer%TGrad ! MPI_timer_cumer%TTotal=MPI_timer_cumer%TTotal+tmp_timer_cumer%TDiag diff --git a/src/mpi_setup.f90 b/src/mpi_setup.f90 index cbb0f21a6..307981bff 100644 --- a/src/mpi_setup.f90 +++ b/src/mpi_setup.f90 @@ -23,7 +23,7 @@ subroutine initialize_quick_mpi() call MPI_GET_PROCESSOR_NAME(pname,namelen,mpierror) call MPI_BARRIER(MPI_COMM_WORLD,mpierror) - if(.not. allocated(MPI_STATUS)) allocate(MPI_STATUS(MPI_STATUS_SIZE)) + if(.not. allocated(QUICK_MPI_STATUS)) allocate(QUICK_MPI_STATUS(MPI_STATUS_SIZE)) if (mpirank.eq.0) then master=.true. @@ -375,7 +375,7 @@ subroutine mgpu_setup() mgpu_ids(1)=mgpu_id do i=1,mpisize-1 - call MPI_RECV(mgpu_ids(i+1),1,mpi_integer,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) + call MPI_RECV(mgpu_ids(i+1),1,mpi_integer,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) enddo endif @@ -588,8 +588,8 @@ subroutine get_mpi_ssw else do i=1,mpisize-1 - call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) - call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR) + call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) + call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) do j=1,quick_dft_grid%init_ngpts quick_xcg_tmp%sswt(j)=quick_xcg_tmp%sswt(j)+quick_xcg_tmp%tmp_sswt(j) diff --git a/src/obsolete/electdii.f90 b/src/obsolete/electdii.f90 index de0d810b5..f6df4a72b 100644 --- a/src/obsolete/electdii.f90 +++ b/src/obsolete/electdii.f90 @@ -832,11 +832,11 @@ subroutine electdiisdc(jscf,PRMS) do i=1,mpi_dc_fragn(ittt) itt=mpi_dc_frag(ittt,i) call MPI_RECV(codcsub(1:NNmax,1:NNmax,itt),NNmax*NNmax, & - mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) call MPI_RECV(codcsubtran(1:NNmax,1:NNmax,itt),NNmax*NNmax, & - mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) call MPI_RECV(evaldcsub(itt,1:NNmax),NNmax,mpi_double_precision, & - ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR) + ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) enddo enddo endif From b316ae378a081fcf3550c5561e28cd885d4b8c91 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <1138669+ohearnk@users.noreply.github.com> Date: Wed, 20 Mar 2024 22:15:25 -0400 Subject: [PATCH 2/2] Fix syntax errors (line continuations). --- src/dnc/dnc_hfoperator.f90 | 2 +- src/mpi_setup.f90 | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/dnc/dnc_hfoperator.f90 b/src/dnc/dnc_hfoperator.f90 index fd5e0093f..5e9614a01 100644 --- a/src/dnc/dnc_hfoperator.f90 +++ b/src/dnc/dnc_hfoperator.f90 @@ -296,7 +296,7 @@ end subroutine hfoperatordc subroutine mpi_hfoperatordc(oneElecO) use allmod use quick_gaussian_class_module - use quick_cutoff_module, only: cshell_density_cutoff + use quick_cutoff_module, only: cshell_density_cutoff use quick_cshell_eri_module, only: getCshellEriEnergy use mpi implicit double precision(a-h,o-z) diff --git a/src/mpi_setup.f90 b/src/mpi_setup.f90 index 307981bff..8dbbad678 100644 --- a/src/mpi_setup.f90 +++ b/src/mpi_setup.f90 @@ -588,8 +588,10 @@ subroutine get_mpi_ssw else do i=1,mpisize-1 - call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) - call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR) + call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,& + QUICK_MPI_STATUS,IERROR) + call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,& + QUICK_MPI_STATUS,IERROR) do j=1,quick_dft_grid%init_ngpts quick_xcg_tmp%sswt(j)=quick_xcg_tmp%sswt(j)+quick_xcg_tmp%tmp_sswt(j)