Merge pull request merzlab#348 from ohearnk/build-updates-intel-oneapi

Add build support for Intel OneAPI / LLVM compilers
agoetz · Mar 21, 2024 · a15d07c · a15d07c
2 parents ef4d027 + b316ae3
commit a15d07c
Show file tree

Hide file tree

Showing 9 changed files with 104 additions and 21 deletions.
diff --git a/cmake/AmberCompilerConfig.cmake b/cmake/AmberCompilerConfig.cmake
@@ -2,28 +2,32 @@
 
 # Handle the COMPILER option
 # --------------------------------------------------------------------
-set(ALL_COMPILER_VALUES GNU INTEL PGI CRAY CLANG MSVC AUTO MANUAL)
+set(ALL_COMPILER_VALUES GNU INTEL INTELLLVM ONEAPI PGI CRAY CLANG MSVC AUTO MANUAL)
 
 # help message displayed when COMPILER is unset or invalid
 set(COMPILER_HELP "
                        Supported ${PROJECT_NAME} compilers:
                  -----------------------------------------------------------------------
   COMPILER value | C executable | C++ executable | Fortran executable | tested versions
   --------------------------------------------------------------------------------------
-      GNU        |     gcc      |      g++       |     gfortran       | 4.8.5+
-      INTEL      |     icc      |      icpc      |     ifort          | 19
-      PGI        |     pgcc     |      pgc++     |     pgf90          | 
+      GNU        |     gcc      |      g++       |     gfortran       | 6.0 +
+      INTEL      |     icc      |      icpc      |     ifort          | 19 - 22
+      INTELLLVM  |     icx      |      icpx      |     ifx            | 2024
+      ONEAPI     |     icx      |      icpx      |     ifx            | 2024
+      PGI        |     pgcc     |      pgc++     |     pgf90          | 14.9, 15.4, 16.5
       CLANG      |     clang    |      clang++   |     gfortran       | 
-      CRAY       |     cc       |      CC        |     ftn            | 
+      CRAY       |     cc       |      CC        |     ftn            | 8.4.6*
   --------------------------------------------------------------------------------------
       AUTO       |   <uses the default CMake-chosen compilers>
       MANUAL     |   <uses the CC, CXX, and FC environment variables, or the 
                          CMAKE_<LANGUAGE>_COMPILER CMake variables>
+  --------------------------------------------------------------------------------------
+   Note that INTEL is Intel classic, INTELLLVM is Intel oneAPI, and ONEAPI is INTELLLVM.
     ")
 
 
 #create actual option
-set(COMPILER "" CACHE STRING "Compiler to build Amber with.  Valid values: GNU, INTEL, PGI, CRAY, CLANG, MSVC, AUTO, MANUAL.  If 'auto', autodetect the host compiler, or use the CC,CXX,and FC variables if they are set.
+set(COMPILER "" CACHE STRING "Compiler to build Amber with.  Valid values: GNU, INTEL, INTELLLVM, ONEAPI, PGI, CRAY, CLANG, MSVC, AUTO, MANUAL.  If 'auto', autodetect the host compiler, or use the CC,CXX,and FC variables if they are set.
  This option can ONLY be set the first time CMake is run.  If you want to change it, delete the build directory and start over.")
 
 set(COMPILER_VALID FALSE)
@@ -139,6 +143,10 @@ if(FIRST_RUN)
 		set_compiler(C icc)
 		set_compiler(CXX icpc)
 		set_compiler(Fortran ifort)
+	elseif(${COMPILER} STREQUAL INTELLLVM OR ${COMPILER} STREQUAL ONEAPI)
+		set_compiler(C icx)
+		set_compiler(CXX icpx)
+		set_compiler(Fortran ifx)
 	elseif(${COMPILER} STREQUAL PGI)
 		set_compiler(C pgcc)
 		set_compiler(CXX pgc++)

diff --git a/cmake/CompilerFlags.cmake b/cmake/CompilerFlags.cmake
@@ -333,6 +333,79 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
 	set(OPT_CXXFLAGS -O3)
 endif()
 
+#IntelLLVM aka Intel oneAPI
+#---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+if("${CMAKE_C_COMPILER_ID}" STREQUAL "IntelLLVM")
+	set(CMAKE_C_FLAGS_DEBUG "-g -debug all")
+	set(OPT_CFLAGS -ip -O3)
+
+	#  How flags get set for optimization depend on whether we have a MIC processor,
+    #  the version of Intel compiler we have, and whether we are cross-compiling
+    #  for multiple versions of SSE support.  The following coordinates all of this.
+    #  This was done assuming that MIC and SSE are mutually exclusive and that we want
+    #  SSE instructions included only when optimize = yes.  Note that use of an
+    #  SSE_TYPES specification needs to be given in place of xHost not in addition to.
+    #  This observed behavior is not what is reported by the Intel man pages. BPK
+
+	if(SSE)
+		# BPK removed section that modified O1 or O2 to be O3 if optimize was set to yes.
+      	# We already begin with the O3 setting so it wasn't needed.
+        # For both coptflags and foptflags, use the appropriate settings
+        # for the sse flags (compiler version dependent).
+        if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 11 OR ${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 11)
+			if(NOT "${SSE_TYPES}" STREQUAL "")
+				list(APPEND OPT_CFLAGS "-ax${SSE_TYPES}")
+			else()
+				list(APPEND OPT_CFLAGS -xHost)
+			endif()
+		else()
+			list(APPEND OPT_CFLAGS -axSTPW)
+		endif()
+
+	endif()
+endif()
+
+if("${CMAKE_Fortran_COMPILER_ID}" STREQUAL "IntelLLVM")
+
+	if(WIN32)
+		add_flags(Fortran /D_CRT_SECURE_NO_WARNINGS)
+		set(OPT_FFLAGS "/Ox")
+		set(CMAKE_Fortran_FLAGS_DEBUG "/Zi")
+	else()
+		set(CMAKE_Fortran_FLAGS_DEBUG "-g -debug all")
+		set(OPT_FFLAGS -ip -O3)
+
+		if(SSE)
+			if("${CMAKE_Fortran_COMPILER_VERSION}" VERSION_GREATER 11 OR ${CMAKE_Fortran_COMPILER_VERSION} VERSION_EQUAL 11)
+				if(NOT "${SSE_TYPES}" STREQUAL "")
+					list(APPEND OPT_FFLAGS "-ax${SSE_TYPES}")
+				else()
+					list(APPEND OPT_FFLAGS -xHost)
+				endif()
+			else()
+				list(APPEND OPT_FFLAGS -axSTPW)
+			endif()
+		endif()
+
+		# warning flags
+		add_flags(Fortran "-warn all" "-warn nounused")
+
+		option(IFORT_CHECK_INTERFACES "If enabled and Intel Fortran is in use, then ifort will check that types passed to functions are the correct ones, and produce warnings or errors for mismatches." FALSE)
+
+		if(NOT IFORT_CHECK_INTERFACES)
+			# disable errors from type mismatches
+			add_flags(Fortran -warn nointerfaces)
+		endif()
+
+	endif()
+endif()
+
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
+	set(CMAKE_CXX_FLAGS_DEBUG "-g -debug all")
+	set(OPT_CXXFLAGS -O3)
+endif()
+
 # PGI
 #---------------------------------------------------------------------------------------------------------------------------------------------------------------------
 if("${CMAKE_C_COMPILER_ID}" STREQUAL "PGI")

diff --git a/src/calMP2.f90 b/src/calMP2.f90
@@ -485,7 +485,7 @@ subroutine MPI_calmp2
      else
         do i=1,mpisize-1
            ! receive integrals from slave nodes
-           call MPI_RECV(temp4d,nbasis*ivir*iocc*nsteplength,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+           call MPI_RECV(temp4d,nbasis*ivir*iocc*nsteplength,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
            ! and sum them into operator
            do j1=1,nbasis
               do k1=1,ivir

diff --git a/src/dnc/dnc_hfoperator.f90 b/src/dnc/dnc_hfoperator.f90
@@ -296,7 +296,7 @@ end subroutine hfoperatordc
 subroutine mpi_hfoperatordc(oneElecO)
    use allmod
    use quick_gaussian_class_module
-    use quick_cutoff_module, only: cshell_density_cutoff
+   use quick_cutoff_module, only: cshell_density_cutoff
    use quick_cshell_eri_module, only: getCshellEriEnergy
    use mpi
    implicit double precision(a-h,o-z)
@@ -427,7 +427,7 @@ subroutine mpi_hfoperatordc(oneElecO)
    else
       do i=1,mpisize-1
          ! receive opertors from slave nodes
-         call MPI_RECV(temp2d,nbasis*nbasis,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+         call MPI_RECV(temp2d,nbasis*nbasis,mpi_double_precision,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
          ! and sum them into operator
          do ii=1,nbasis
             do jj=1,nbasis

diff --git a/src/dnc/dnc_scf.f90 b/src/dnc/dnc_scf.f90
@@ -313,11 +313,11 @@ subroutine electdiisdc(jscf,PRMS)
                do i=1,mpi_dc_fragn(ittt)
                   itt=mpi_dc_frag(ittt,i)
                   call MPI_RECV(codcsub(1:NNmax,1:NNmax,itt),NNmax*NNmax, &
-                        mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                        mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                   call MPI_RECV(codcsubtran(1:NNmax,1:NNmax,itt),NNmax*NNmax, &
-                        mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                        mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                   call MPI_RECV(evaldcsub(itt,1:NNmax),NNmax,mpi_double_precision, &
-                        ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                        ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                enddo
             enddo
          endif

diff --git a/src/modules/quick_mpi_module.f90 b/src/modules/quick_mpi_module.f90
@@ -34,7 +34,7 @@ module quick_mpi_module
     logical :: master = .true.      ! flag to show if the node is master node
     logical :: bMPI = .true.        ! flag to show if MPI is turn on
     logical :: libMPIMode = .false. ! if mpi is initialized somewhere other than quick
-    integer, allocatable :: MPI_STATUS(:)
+    integer, allocatable :: QUICK_MPI_STATUS(:)
     integer, parameter :: MIN_1E_MPI_BASIS=6
     integer, allocatable :: mgpu_ids(:)    
     integer :: mgpu_id

diff --git a/src/modules/quick_timer_module.f90 b/src/modules/quick_timer_module.f90
@@ -373,7 +373,7 @@ subroutine timer_output(io)
 !                MPI_timer_cumer=timer_cumer
 !                max_timer_cumer=timer_cumer
 !                do i=1,mpisize-1
-!                    call MPI_RECV(tmp_timer_cumer,1,mpi_timer_cumer_type,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+!                    call MPI_RECV(tmp_timer_cumer,1,mpi_timer_cumer_type,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
 !                    MPI_timer_cumer%TTotal=MPI_timer_cumer%TTotal+tmp_timer_cumer%T2e+tmp_timer_cumer%TMP2+ &
 !                        tmp_timer_cumer%T1e+ tmp_timer_cumer%TDiag+tmp_timer_cumer%TGrad
 !                    MPI_timer_cumer%TTotal=MPI_timer_cumer%TTotal+tmp_timer_cumer%TDiag

diff --git a/src/mpi_setup.f90 b/src/mpi_setup.f90
@@ -23,7 +23,7 @@ subroutine initialize_quick_mpi()
       call MPI_GET_PROCESSOR_NAME(pname,namelen,mpierror)
       call MPI_BARRIER(MPI_COMM_WORLD,mpierror)
 
-      if(.not. allocated(MPI_STATUS)) allocate(MPI_STATUS(MPI_STATUS_SIZE))
+      if(.not. allocated(QUICK_MPI_STATUS)) allocate(QUICK_MPI_STATUS(MPI_STATUS_SIZE))
 
       if (mpirank.eq.0) then
         master=.true.
@@ -375,7 +375,7 @@ subroutine mgpu_setup()
         mgpu_ids(1)=mgpu_id
 
         do i=1,mpisize-1
-          call MPI_RECV(mgpu_ids(i+1),1,mpi_integer,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+          call MPI_RECV(mgpu_ids(i+1),1,mpi_integer,i,i,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
         enddo
 
       endif
@@ -588,8 +588,10 @@ subroutine get_mpi_ssw
    else
 
       do i=1,mpisize-1
-         call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
-         call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+         call MPI_RECV(quick_xcg_tmp%tmp_sswt,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,&
+                 QUICK_MPI_STATUS,IERROR)
+         call MPI_RECV(quick_xcg_tmp%tmp_weight,quick_dft_grid%init_ngpts,mpi_double_precision,i,i,MPI_COMM_WORLD,&
+                 QUICK_MPI_STATUS,IERROR)
 
          do j=1,quick_dft_grid%init_ngpts
             quick_xcg_tmp%sswt(j)=quick_xcg_tmp%sswt(j)+quick_xcg_tmp%tmp_sswt(j)

diff --git a/src/obsolete/electdii.f90 b/src/obsolete/electdii.f90
@@ -832,11 +832,11 @@ subroutine electdiisdc(jscf,PRMS)
                   do i=1,mpi_dc_fragn(ittt)
                      itt=mpi_dc_frag(ittt,i)
                      call MPI_RECV(codcsub(1:NNmax,1:NNmax,itt),NNmax*NNmax, &
-                           mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                           mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                      call MPI_RECV(codcsubtran(1:NNmax,1:NNmax,itt),NNmax*NNmax, &
-                           mpi_double_precision,ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                           mpi_double_precision,ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                      call MPI_RECV(evaldcsub(itt,1:NNmax),NNmax,mpi_double_precision, &
-                           ittt,itt,MPI_COMM_WORLD,MPI_STATUS,IERROR)
+                           ittt,itt,MPI_COMM_WORLD,QUICK_MPI_STATUS,IERROR)
                   enddo
                enddo
             endif