From 97408a56231f9cdef2558d0a50e70c0c78619961 Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Mon, 22 Jan 2024 15:28:47 +0100
Subject: [PATCH 1/6] makefile changes for GPU

---
 doc/Sphinx/Use/installation.rst               | 173 +++++-----
 makefile                                      | 298 ++++++++----------
 scripts/compile_tools/machine/adastra         |  42 +--
 scripts/compile_tools/machine/jean_zay_gpu    |  30 +-
 .../compile_tools/machine/jean_zay_gpu_A100   |  30 +-
 .../compile_tools/machine/jean_zay_gpu_nvcc   |  30 +-
 scripts/compile_tools/machine/juanjo_cpu      |   4 +-
 scripts/compile_tools/machine/llracp          |   4 +-
 scripts/compile_tools/machine/ruche_gpu       |   4 +-
 scripts/compile_tools/machine/ruche_gpu2      |  27 +-
 10 files changed, 297 insertions(+), 345 deletions(-)
diff --git a/doc/Sphinx/Use/installation.rst b/doc/Sphinx/Use/installation.rst
index a2f872e36..4027459d8 100755
--- a/doc/Sphinx/Use/installation.rst
+++ b/doc/Sphinx/Use/installation.rst
@@ -1,39 +1,61 @@
 Install
 -------
 
-Before installing :program:`Smilei`, you need to install a few dependencies:
+Installing Smilei requires several steps:
 
-* A C++11 compiler, optionally implementing openMP version > 4.5
-  (gcc users: v6.0 or newer recommended)
-* an MPI library (by default a version supporting ``MPI_THREAD_MULTIPLE``
-  is required: v4.0 or newer recommended)
-* an HDF5 library compatible with your versions of C++ and MPI
-* Python 2.7 or Python 3+ (with header files)
-
-Optional dependencies are:
-
-* Git
-* Python modules: sphinx, h5py, numpy, matplotlib, pint
-* ffmpeg
-* CUDA for NVIDIA GPUs or HIP-SYCL for AMD GPUs (it is recommended to use the already installed software stack and the support team of a supercomputer you have access to). 
+#. Install compilers and libraries that Smilei needs (*dependencies*)
+#. Download Smilei
+#. Setup your environment (*environment variables*)
+#. Compile
 
 ----
 
 Install the dependencies
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
+The **necessary** dependencies are:
+
+* A C++11 compiler, optionally implementing openMP version > 4.5.
+* An MPI library (by default a version supporting ``MPI_THREAD_MULTIPLE``).
+  IntelMPI or OpenMPI are recommended.
+* The **parallel** HDF5 library compiled with your versions of C++ and MPI.
+* Python 3+ with header files.
+
+When compiling on GPU:
+
+* The C++ compiler must be GPU-aware (typically ``nvc++`` for NVIDIA or ``clang`` for AMD)
+* A CUDA or HIP compiler is necessary (typically ``nvcc`` for NVIDIA or ``hipcc`` for AMD)
+
+Optional dependencies are:
+
+* `Git <https://git-scm.com/>`_ for version control
+* Python modules for post-processing: sphinx, h5py, numpy, matplotlib, pint
+* `FFmpeg <https://ffmpeg.org/>`_ for converting animations to videos
+
 There are various ways to install all dependencies, depending on the platform:
 
 * :doc:`On MacOs<install_macos>`
 * :doc:`On Linux<install_linux>`
 * :doc:`On a supercomputer<install_supercomputer>`
 
-The command ``make help`` can give you some information about your environment.
-
 If you have successfully installed these dependencies on other platforms,
 please :doc:`contact us </Overview/partners>` and share!
 
 
+----
+
+Download the Smilei source
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Clone the latest :program:`Smilei` version from Github:
+
+.. code-block:: bash
+
+  cd /path/of/your/choice/
+  git clone https://github.com/SmileiPIC/Smilei.git
+
+If you prefer a direct download, see :ref:`here <latestVersion>`.
+
 ----
 
 Setup environment variables for compilation
@@ -43,44 +65,34 @@ Several environment variables may be required, depending on your setup.
 
 * ``SMILEICXX``: the MPI-C++ compiler.
   Defaults to ``mpicxx``.
-* ``HDF5_ROOT_DIR``: the folder for the HDF5 library.
+* ``HDF5_ROOT_DIR``: the folder of the HDF5 library.
   Defaults to ``$HDF5_ROOT``.
 * ``BUILD_DIR``: the folder where the compilation should occur.
   Defaults to ``./build``.
 * ``PYTHONEXE``: the python executable to use in smilei.
   Defaults to ``python``.
+* ``CXXFLAGS``: flags for the C++ compiler.
+* ``LDFLAGS``: flags for the linker.
+* ``GPU_COMPILER``: the compiler for CUDA or HIP (typically ``nvcc`` or ``hipcc``).
+  Defaults to ``$CC``.
+* ``GPU_COMPILER_FLAGS``: flags for ``$GPU_COMPILER``.
 
-The usual ``CXXFLAGS`` and ``LDFLAGS`` can also be used to pass other
-arguments to the compiler and linker.
-
+The command ``make help`` can give you some information about your environment.
 
 ----
 
 .. _compile:
 
-Download and compile
+Compile Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-#. Clone the latest :program:`Smilei` version from Github:
+In a terminal, go to the folder where you downloaded :program:`Smilei` and use the commmand
 
-   .. code-block:: bash
-    
-     cd /path/of/your/choice/
-     git clone https://github.com/SmileiPIC/Smilei.git
-    
-   If you do not have ``git``, you can dowload a tarball :ref:`here <latestVersion>`
-   and extract it in a new folder.
-
-#. In a terminal, go to that location and compile:
-
-   .. code-block:: bash
+.. code-block:: bash
 
-     cd Smilei
-     make
-   
-   If the compilation is successful, you should now have a new ``smilei`` executable.
+  make
 
-#. The next step is to :doc:`write a namelist <namelist>`.
+If the compilation is successful, you should now have a new ``smilei`` executable.
 
 ----
 
@@ -91,22 +103,22 @@ Advanced compilation options
 
 .. code-block:: bash
 
-  make -j 4
+  make -j 4  # Compiles on 4 threads
 
 .. rubric:: Compilation configuration with keyword "config"
 
 .. code-block:: bash
 
-  make config=debug                        # With debugging output (slow execution)
-  make config=noopenmp                     # Without OpenMP support
-  make config=no_mpi_tm                    # Without a MPI library which supports MPI_THREAD_MULTIPLE
-  make config=scalasca                     # For the Scalasca profiler
-  make config=advisor                      # For Intel Advisor
-  make config=vtune                        # For Intel Vtune
-  make config=inspector                    # For Intel Inspector
-  make config=detailed_timers              # More detailed timers, but somewhat slower execution
-  make config="gpu_nvidia noopenmp"        # For Nvidia GPU acceleration
-  make config="gpu_amd"                    # For AMD GPU acceleration
+  make config=noopenmp        # Without OpenMP support
+  make config=no_mpi_tm       # Without a MPI library which supports MPI_THREAD_MULTIPLE
+  make config=gpu_nvidia      # For Nvidia GPU acceleration
+  make config=gpu_amd         # For AMD GPU acceleration
+  make config=debug           # With debugging output (slow execution)
+  make config=scalasca        # For the Scalasca profiler
+  make config=advisor         # For Intel Advisor
+  make config=vtune           # For Intel Vtune
+  make config=inspector       # For Intel Inspector
+  make config=detailed_timers # More detailed timers, but somewhat slower execution
 
 It is possible to combine arguments above within quotes, for instance:
 
@@ -114,8 +126,6 @@ It is possible to combine arguments above within quotes, for instance:
 
   make config="debug noopenmp" # With debugging output, without OpenMP
 
-However, some arguments may not be compatible, e.g. ``noopenmp`` and ``omptasks``. 
-
 .. rubric:: Obtain some information about the compilation
 
 .. code-block:: bash
@@ -140,46 +150,45 @@ executed before compilation. If you successfully write such a file for
 a common supercomputer, please share it with developpers so that it can
 be included in the next release of :program:`Smilei`.
 
+----
 
-.. rubric:: Compilation for GPU accelerated nodes:
-
-As each supercomputer has a different environnment to compile for GPUs and since the nvhpc + CUDA/ cray + HIP modules evolve quickly, a machine file is required for the compilation.
-Several machine files are already available as an example in smilei/scripts/compile_tools/machine/ ; such as: jean_zay_gpu_V100, jean_zay_gpu_A100, adastra, ruche_gpu2.
-
-Typically we need it to specify ACCELERATOR_GPU_FLAGS += -ta=tesla:cc80 for nvhpc <23.4 and ACCELERATOR_GPU_FLAGS += -gpu=cc80 -acc for the more recent versions of nvhpc.
-
-.. code-block:: bash
-
-	make -j 12 machine="jean_zay_gpu_A100" config="gpu_nvidia noopenmp verbose" # for Nvidia GPU
-	make -j 12 machine="adastra" config="gpu_amd" 			            # for AMD GPU
-
+Compilation for GPU accelerated nodes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Furthermore, here are 2 examples of known working ennvironments, first for AMD GPUs, second for Nvidia GPUs:
+On GPU, two compilers are used: a C++ compiler for the main code
+(defined by the variable ``$SMILEICXX``) and a compiler for 
+``.cu`` CUDA files (defined by the variable ``$GPU_COMPILER``).
+For NVIDIA, it is recommended to use the ``nvhpc`` software kit
+which includes the compilers ``nvc++`` and ``nvcc``.
+For AMD, the equivalent ``ROCm`` software kit includes ``clang`` and ``hipcc``.
 
-.. code-block:: bash
+Generally, several flags must be supplied to these compilers in order
+to target properly your system architecture. They must
+be supplied in ``$CXXFLAGS`` and ``$GPU_COMPILER_FLAGS``.
+Please refer to the system administrators to find available compilers
+and the required flags for your machine, as well as the commands
+needed to load the correct environment.
 
-	module purge
-	module load craype-accel-amd-gfx90a craype-x86-trento
-	module load PrgEnv-cray/8.3.3
-	module load cpe/23.02
-	module load cray-mpich/8.1.24 cray-hdf5-parallel/1.12.2.1 cray-python/3.9.13.1
-	module load amd-mixed/5.2.3
+The compilation of Smilei must include a special ``config`` keyword equal to either
+``gpu_nvidia`` or ``gpu_amd``.
+Two examples are provided as guidance:
 
 .. code-block:: bash
 
-	module purge
-	module load anaconda-py3/2020.11  # python is fine as well if you can pip install the required modules
-	module load nvidia-compilers/23.1
-	module load cuda/11.2
-	module load openmpi/4.1.1-cuda
-	module load hdf5/1.12.0-mpi-cuda
-	# For HDF5, note that module show can give you the right path
-	export HDF5_ROOT_DIR=/DIRECTORY_NAME/hdf5/1.12.0/pgi-20.4-HASH/
+  make -j 12 machine="jean_zay_gpu_A100" config="gpu_nvidia" # example for Nvidia GPU
+  make -j 12 machine="adastra" config="gpu_amd"              # example for AMD GPU
 
-Note: 
+In these cases, the environment variables were included in *machine files* that
+you can find in ``scripts/compile_tools/machine/``.
+Typically ``CXXFLAGS += -ta=tesla:cc80`` for ``nvhpc`` <23.4 and
+``CXXFLAGS += -gpu=cc80 -acc`` for the more recent versions of ``nvhpc``.
 
-* we are aware of issues with CUDA >12.0, fixes are being tested but are not deployed yet. We recommend CUDA 11.x at the moment.
-* The hdf5 module should be compiled with the nvidia/cray compiler ; openmpi as well, but depending on the nvhpc module it might not be needed as it can be included in the nvhpc module 
+.. warning::
+  
+  * We are aware of issues with CUDA >12.0, fixes are being tested but are not deployed yet.
+    We recommend CUDA 11.x at the moment.
+  * The hdf5 module should be compiled with the nvidia/cray compiler;
+    openmpi as well, but depending on the nvhpc module it might not be needed as it can be included in the nvhpc module.
 
 ----
 
diff --git a/makefile b/makefile
index e71133f09..8ab1c925e 100755
--- a/makefile
+++ b/makefile
@@ -15,10 +15,10 @@
 
 BUILD_DIR ?= build
 SMILEICXX ?= mpicxx
-SMILEICXX.DEPS ?= mpicxx
 PYTHONEXE ?= python
 HDF5_ROOT_DIR ?= $(HDF5_ROOT)
 BOOST_ROOT_DIR ?= $(BOOST_ROOT)
+GPU_COMPILER ?= $(CC)
 TABLES_BUILD_DIR ?= tools/tables/build
 
 #-----------------------------------------------------
@@ -40,9 +40,9 @@ VERSION:=$(shell $(PYTHONEXE) scripts/compile_tools/get-version.py )
 COMPILER_INFO := $(shell $(SMILEICXX) -show | cut -d' ' -f1)
 
 ifeq ($(findstring g++, $(COMPILER_INFO)), g++)
-    CXXFLAGS += -Wno-reorder
+	CXXFLAGS += -Wno-reorder
 else ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
-    CXXFLAGS += -Wdeprecated-register
+	CXXFLAGS += -Wdeprecated-register
 endif
 
 #-----------------------------------------------------
@@ -65,16 +65,16 @@ TABLES_SRCS := $(shell find tools/tables/* -name \*.cpp)
 #-----------------------------------------------------
 # check whether to use a machine specific definitions
 ifneq ($(machine),)
-    ifneq ($(wildcard scripts/compile_tools/machine/$(machine)),)
-    -include scripts/compile_tools/machine/$(machine)
-    else
+	ifneq ($(wildcard scripts/compile_tools/machine/$(machine)),)
+	-include scripts/compile_tools/machine/$(machine)
+	else
 define errormsg
 ERROR: Cannot find machine file for "$(machine)"
 Available machines are:
 $(shell ls -1 scripts/compile_tools/machine)
 endef
-    $(error $(errormsg))
-    endif
+	$(error $(errormsg))
+	endif
 endif
 
 #-----------------------------------------------------
@@ -86,17 +86,17 @@ CXXFLAGS += -D__VERSION=\"$(VERSION)\"
 CXXFLAGS += -DOMPI_SKIP_MPICXX
 # C++ version
 ifeq ($(findstring armclang++, $(COMPILER_INFO)), armclang++)
-    CXXFLAGS += -std=c++11 -Wall
+	CXXFLAGS += -std=c++11 -Wall
 else ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
-    CXXFLAGS += -std=c++11 -Wall -Wno-unused-command-line-argument
+	CXXFLAGS += -std=c++11 -Wall -Wno-unused-command-line-argument
 else ifeq ($(findstring g++, $(COMPILER_INFO)), g++)
-    CXXFLAGS += -std=c++11 -Wall -Wextra
+	CXXFLAGS += -std=c++11 -Wall -Wextra
 else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
-    CXXFLAGS += -std=c++11
+	CXXFLAGS += -std=c++11
 else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCCpx)
-    CXXFLAGS += -std=c++11
+	CXXFLAGS += -std=c++11
 else
-    CXXFLAGS += -std=c++14 #-Wall #not recognized by nvcc, make an exception
+	CXXFLAGS += -std=c++14 #-Wall #not recognized by nvcc, make an exception
 endif
 
 # HDF5 library
@@ -121,96 +121,122 @@ CXXFLAGS += $(PY_CXXFLAGS)
 PY_LDFLAGS := $(shell $(PYTHONCONFIG) --ldflags)
 LDFLAGS += $(PY_LDFLAGS)
 ifneq ($(strip $(PYTHONHOME)),)
-    LDFLAGS += -L$(PYTHONHOME)/lib
+	LDFLAGS += -L$(PYTHONHOME)/lib
 endif
 
 # Manage options in the "config" parameter
 ifneq (,$(call parse_config,debug))
-    CXXFLAGS += -g -pg -D__DEBUG -O0
+	CXXFLAGS += -g -pg -D__DEBUG -O0
 # With gdb
 else ifneq (,$(call parse_config,gdb))
-    CXXFLAGS += -g -D__DEBUG -O0
+	CXXFLAGS += -g -D__DEBUG -O0
 # With gdb
 else ifneq (,$(call parse_config,ddt))
-    # -g
-    CXXFLAGS += -O0 -g
+	# -g
+	CXXFLAGS += -O0 -g
 # With valgrind
 else ifneq (,$(call parse_config,valgrind))
-    CXXFLAGS += -g -O3
+	CXXFLAGS += -g -O3
 
 # Scalasca
 else ifneq (,$(call parse_config,scalasca))
-    CXXFLAGS += -g  -O3
-    SMILEICXX = scalasca -instrument $(SMILEICXX)
+	CXXFLAGS += -g  -O3
+	SMILEICXX = scalasca -instrument $(SMILEICXX)
 
 # With Intel Advisor / Vtune
 else ifneq (,$(call parse_config,advisor))
-    CXXFLAGS += -g -O3 -shared-intel -debug inline-debug-info -qopenmp-link dynamic -parallel-source-info=2
+	CXXFLAGS += -g -O3 -shared-intel -debug inline-debug-info -qopenmp-link dynamic -parallel-source-info=2
 
 # With Intel Inspector
 else ifneq (,$(call parse_config,inspector))
-    CXXFLAGS += -g -O0 -I$(INSPECTOR_ROOT_DIR)/include/
-    LDFLAGS += $(INSPECTOR_ROOT_DIR)/lib64/libittnotify.a
+	CXXFLAGS += -g -O0 -I$(INSPECTOR_ROOT_DIR)/include/
+	LDFLAGS += $(INSPECTOR_ROOT_DIR)/lib64/libittnotify.a
 
 # Default configuration
 else
-    ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
-    	CXXFLAGS += -O3 -g -fno-math-errno
-    else ifeq ($(findstring armclang++, $(COMPILER_INFO)), armclang++)
-        CXXFLAGS += -Ofast -g
-    else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
-        CXXFLAGS += -O3 -Kfast -g
-    else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
-        CXXFLAGS += -O3 -Kfast -g
-    else ifeq ($(findstring pgi, $(COMPILER_INFO)), pgi)
-        CXXFLAGS += -O3
-    else
-        CXXFLAGS += -O3 -g
-    endif
+	ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
+		CXXFLAGS += -O3 -g -fno-math-errno
+	else ifeq ($(findstring armclang++, $(COMPILER_INFO)), armclang++)
+		CXXFLAGS += -Ofast -g
+	else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
+		CXXFLAGS += -O3 -Kfast -g
+	else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
+		CXXFLAGS += -O3 -Kfast -g
+	else ifeq ($(findstring pgi, $(COMPILER_INFO)), pgi)
+		CXXFLAGS += -O3
+	else
+		CXXFLAGS += -O3 -g
+	endif
 endif
 
 # Optimization report
 ifneq (,$(call parse_config,opt-report))
-    # Clang compiler
-    ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
-        CXXFLAGS += -fsave-optimization-record -Rpass-analysis=loop-vectorize
-    else ifeq ($(findstring armclang++, $(COMPILER_INFO)), armclang++)
-        CXXFLAGS += -fsave-optimization-record -Rpass-analysis=loop-vectorize
-    else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
-        CXXFLAGS += -Koptmsg=2 -Nlst=t
-    else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
-        CXXFLAGS += -Koptmsg=2 -Nlst=t
-    else ifeq ($(findstring g++, $(COMPILER_INFO)), g++)
-        CXXFLAGS += -fopt-info
-    # Intel compiler
-    else ifeq ($(findstring icpc, $(COMPILER_INFO)), icpc)
-        CXXFLAGS += -qopt-report5
-    endif
+	# Clang compiler
+	ifeq ($(findstring clang++, $(COMPILER_INFO)), clang++)
+		CXXFLAGS += -fsave-optimization-record -Rpass-analysis=loop-vectorize
+	else ifeq ($(findstring armclang++, $(COMPILER_INFO)), armclang++)
+		CXXFLAGS += -fsave-optimization-record -Rpass-analysis=loop-vectorize
+	else ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
+		CXXFLAGS += -Koptmsg=2 -Nlst=t
+	else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
+		CXXFLAGS += -Koptmsg=2 -Nlst=t
+	else ifeq ($(findstring g++, $(COMPILER_INFO)), g++)
+		CXXFLAGS += -fopt-info
+	# Intel compiler
+	else ifeq ($(findstring icpc, $(COMPILER_INFO)), icpc)
+		CXXFLAGS += -qopt-report5
+	endif
 endif
 
 # Detailed timers
 ifneq (,$(call parse_config,detailed_timers))
-    CXXFLAGS += -D__DETAILED_TIMERS
+	CXXFLAGS += -D__DETAILED_TIMERS
+endif
+
+# NVIDIA GPUs
+ifneq (,$(call parse_config,gpu_nvidia))
+	override config += noopenmp # Prevent openmp for nvidia
+	
+	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
+	GPU_COMPILER = nvcc
+	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%) $(PY_FLAGS)
+	
+	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
+	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
+	
+	OBJS += $(GPU_KERNEL_OBJS)
+endif
+
+# AMD GPUs
+ifneq (,$(call parse_config,gpu_amd))
+	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
+	GPU_COMPILER_FLAGS = -x hip -DSMILEI_ACCELERATOR_MODE $(DIRS:%=-I%) $(PY_FLAGS)
+	
+	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
+	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
+	
+	OBJS += $(GPU_KERNEL_OBJS)
 endif
 
 #activate openmp unless noopenmp flag
 # For Fujitsu compiler: -Kopenmp
 ifeq (,$(call parse_config,noopenmp))
-    ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
-        OPENMP_FLAG ?= -Kopenmp -Kopenmp_simd
-    else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
-        OPENMP_FLAG ?= -Kopenmp -Kopenmp_simd
-    else
-    	OPENMP_FLAG ?= -fopenmp
-    endif
-    LDFLAGS += -lm
-    OPENMP_FLAG += -D_OMP
-    LDFLAGS += $(OPENMP_FLAG)
-    CXXFLAGS += $(OPENMP_FLAG)
+$(info OpenMP activated)
+	ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
+		OPENMP_FLAG ?= -Kopenmp -Kopenmp_simd
+	else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)
+		OPENMP_FLAG ?= -Kopenmp -Kopenmp_simd
+	else
+		OPENMP_FLAG ?= -fopenmp
+	endif
+	LDFLAGS += -lm
+	OPENMP_FLAG += -D_OMP
+	LDFLAGS += $(OPENMP_FLAG)
+	CXXFLAGS += $(OPENMP_FLAG)
 endif
 
 ifneq (,$(call parse_config,picsar))
-    # New environment variable
+	# New environment variable
 	FFTW3_LIB ?= $(FFTW_LIB_DIR)
 	LIBPXR ?= picsar/lib
 	# Set Picsar link environment
@@ -226,73 +252,31 @@ endif
 
 # Manage MPI communications by a single thread (master in MW)
 ifneq (,$(call parse_config,no_mpi_tm))
-    CXXFLAGS += -D_NO_MPI_TM
-endif
-
-# NVIDIA GPUs
-ifneq (,$(call parse_config,gpu_nvidia))
-
-    # To toggle between OpenACC/OpenMP support, see the jean_zay_gpu machinefile
-    # By default we provide the OpenACC version on JeanZay
-
-    # # Debugging mode
-    # ifneq (,$(call parse_config,debug))
-    #     ACCELERATOR_GPU_FLAGS += -w -g -D_GPU -Minfo=accel
-    #     ACCELERATOR_GPU_KERNEL_FLAGS += -O0 -G --std c++14 $(DIRS:%=-I%)
-    #     += $(shell $(PYTHONCONFIG) --includes)
-    # # DDT mode
-    # else ifneq (,$(call parse_config,ddt))
-    #     # -g
-    #     ACCELERATOR_GPU_FLAGS += -w -D_GPU -Minfo=accel
-	# # -cudart shared -G
-    #     ACCELERATOR_GPU_KERNEL_FLAGS += -O0 -G --std c++14 $(DIRS:%=-I%)
-    #     ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes)
-    # endif
-	CUDAHIP_FLAG = -x cu 
-	ACCELERATOR_GPU_FLAGS += -DSMILEI_ACCELERATOR_MODE
-        #ACCELERATOR_GPU_KERNEL_FLAGS += --define-macro SMILEI_ACCELERATOR_MODE
-        ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_ACCELERATOR_MODE
-
-    GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
-    GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
-
-    OBJS += $(GPU_KERNEL_OBJS)
-endif
-
-# AMD GPUs
-ifneq (,$(call parse_config,gpu_amd))
-	CUDAHIP_FLAG = -x hip
-	ACCELERATOR_GPU_FLAGS += -DSMILEI_ACCELERATOR_MODE
-        ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_ACCELERATOR_MODE
-
-    GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
-    GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
-
-	OBJS += $(GPU_KERNEL_OBJS)
+	CXXFLAGS += -D_NO_MPI_TM
 endif
 
 # Use OpenMP tasks
 ifneq (,$(call parse_config,omptasks))
-    CXXFLAGS += -D_OMPTASKS
+	CXXFLAGS += -D_OMPTASKS
 endif
 
 ifneq (,$(call parse_config,part_event_tracing_tasks_on))
-    CXXFLAGS += -D_OMPTASKS
-    CXXFLAGS += -D_PARTEVENTTRACING
+	CXXFLAGS += -D_OMPTASKS
+	CXXFLAGS += -D_PARTEVENTTRACING
 endif
 
 ifneq (,$(call parse_config,part_event_tracing_tasks_off))
-    CXXFLAGS += -D_PARTEVENTTRACING
+	CXXFLAGS += -D_PARTEVENTTRACING
 endif
 
-CXXFLAGS0 = $(shell echo $(CXXFLAGS)| sed "s/O3/O0/g" )
+CXXFLAGS0 = $(shell echo $(CXXFLAGS)| sed "s/O3/O0/g")
 
 #-----------------------------------------------------
 # Set the verbosity prefix
 ifeq (,$(call parse_config,verbose))
-    Q := @
+	Q := @
 else
-    Q :=
+	Q :=
 endif
 
 #last: check remaining arguments and raise error
@@ -300,6 +284,7 @@ ifneq ($(strip $(my_config)),)
 $(error "Unused parameters in config : $(my_config)")
 endif
 
+SMILEICXX.DEPS ?= $(SMILEICXX)
 
 #-----------------------------------------------------
 # Rules for building the excutable smilei
@@ -353,48 +338,35 @@ $(BUILD_DIR)/%.d: %.cpp
 	$(Q) if [ ! -d "$(@D)" ]; then mkdir -p "$(@D)"; fi;
 	$(Q) $(SMILEICXX.DEPS) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
 
-# check if the special compilation below are actually needed
-
 # Calculate dependencies: special for Params.cpp which needs pyh files
 $(BUILD_DIR)/src/Params/Params.d: src/Params/Params.cpp $(PYHEADERS)
 	@echo "Checking dependencies for $<"
 	$(Q) if [ ! -d "$(@D)" ]; then mkdir -p "$(@D)"; fi;
-	$(Q) $(SMILEICXX) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
+	$(Q) $(SMILEICXX.DEPS) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
 
 ifeq ($(findstring icpc, $(COMPILER_INFO)), icpc)
-
 $(BUILD_DIR)/src/Diagnostic/DiagnosticScalar.o : src/Diagnostic/DiagnosticScalar.cpp
 	@echo "SPECIAL COMPILATION FOR $<"
-	$(Q) $(SMILEICXX) $(CXXFLAGS) $(ACCELERATOR_GPU_FLAGS) -O1 -c $< -o $@
+	$(Q) $(SMILEICXX) $(CXXFLAGS) -O1 -c $< -o $@
 endif
 
 $(BUILD_DIR)/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTablesDefault.o : src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTablesDefault.cpp
 	@echo "SPECIAL COMPILATION FOR $<"
-	$(Q) $(SMILEICXX) $(CXXFLAGS0) $(ACCELERATOR_GPU_FLAGS) -c $< -o $@
+	$(Q) $(SMILEICXX) $(CXXFLAGS0) -c $< -o $@
 
 $(BUILD_DIR)/src/Radiation/RadiationTablesDefault.o : src/Radiation/RadiationTablesDefault.cpp
 	@echo "SPECIAL COMPILATION FOR $<"
-	$(Q) $(SMILEICXX) $(CXXFLAGS0) $(ACCELERATOR_GPU_FLAGS) -c $< -o $@
-
-# not needed for hip, should be the same for CUDA
-#$(BUILD_DIR)/src/Projector/Projector3D2OrderGPUKernel.o : src/Projector/Projector3D2OrderGPUKernel.cpp
-#	@echo "SPECIAL COMPILATION FOR $<"
-#	$(Q) $(THRUSTCXX) $(ACCELERATOR_GPU_KERNEL_FLAGS) $(CUDAHIP_FLAG) -c $< -o $@
-
-
-#$(BUILD_DIR)src/Radiation/RadiationNiel.o: src/Radiation/RadiationNiel.cpp
-#	@echo "SPECIAL COMPILATION FOR $<"
-#	$(Q) $(SMILEICXX) $(CXXFLAGS) -c $< -o $@
+	$(Q) $(SMILEICXX) $(CXXFLAGS0) -c $< -o $@
 
 # Compile cpps
 $(BUILD_DIR)/%.o : %.cpp
 	@echo "Compiling $<"
-	$(Q) $(SMILEICXX) $(CXXFLAGS) $(ACCELERATOR_GPU_FLAGS) -c $< -o $@
+	$(Q) $(SMILEICXX) $(CXXFLAGS) -c $< -o $@
 
 # Compile cus
 $(BUILD_DIR)/%.o : %.cu
 	@echo "Compiling $<"
-	$(Q) $(THRUSTCXX) $(ACCELERATOR_GPU_KERNEL_FLAGS) -c $< -o $@
+	$(Q) $(GPU_COMPILER) $(GPU_COMPILER_FLAGS) -c $< -o $@
 
 # Link the main program
 $(EXEC): $(OBJS)
@@ -405,7 +377,7 @@ $(EXEC): $(OBJS)
 # Compile the the main program again for test mode
 $(BUILD_DIR)/src/Smilei_test.o: src/Smilei.cpp $(EXEC)
 	@echo "Compiling src/Smilei.cpp for test mode"
-	$(Q) $(SMILEICXX) $(CXXFLAGS) $(ACCELERATOR_GPU_FLAGS) -DSMILEI_TESTMODE -c src/Smilei.cpp -o $@
+	$(Q) $(SMILEICXX) $(CXXFLAGS) -DSMILEI_TESTMODE -c src/Smilei.cpp -o $@
 
 # Link the main program for test mode
 $(EXEC)_test : $(OBJS:Smilei.o=Smilei_test.o)
@@ -420,7 +392,7 @@ PHONY_RULES=clean distclean help env debug doc tar happi uninstall_happi
 # Check dependencies only when necessary
 GOALS = $(if $(MAKECMDGOALS), $(MAKECMDGOALS), default)
 ifneq ($(filter-out $(PHONY_RULES) print-%, $(GOALS)),)
-    -include $(DEPS)
+	-include $(DEPS)
 endif
 
 #-----------------------------------------------------
@@ -515,29 +487,29 @@ help:
 	@echo '----------------'
 	@echo 'Usage:'
 	@echo '  make'
-	@echo 'or, to compile with 4 cpus (for instance):'
+	@echo 'or, to compile with 4 threads (for instance):'
 	@echo '  make -j 4'
 	@echo
 	@echo 'Config options:'
 	@echo '  make config="[ verbose ] [ debug ] [ scalasca ] [ noopenmp ]"'
 	@echo '    verbose                      : to print compile command lines'
-	@echo '    debug                        : to compile in debug mode (code runs really slow)'
-	@echo '    detailed_timers              : to compile the code with more refined timers (refined time report)'
 	@echo '    noopenmp                     : to compile without openmp'
 	@echo '    no_mpi_tm                    : to compile with a MPI library without MPI_THREAD_MULTIPLE support'
+	@echo '    gpu_nvidia                   : to compile for NVIDIA GPU (uses OpenACC)'
+	@echo '    gpu_amd                      : to compile for AMP GPU (uses OpenMP)'
+	@echo '    detailed_timers              : to compile the code with more refined timers (refined time report)'
+	@echo '    debug                        : to compile in debug mode (code runs really slow)'
 	@echo '    opt-report                   : to generate a report about optimization, vectorization and inlining (Intel compiler)'
 	@echo '    scalasca                     : to compile using scalasca'
 	@echo '    advisor                      : to compile for Intel Advisor analysis'
 	@echo '    vtune                        : to compile for Intel Vtune analysis'
 	@echo '    inspector                    : to compile for Intel Inspector analysis'
-	@echo '    gpu_nvidia                   : to compile for GPU (uses OpenACC)'
-	@echo '    gpu_amd                      : to compile for GPU (uses OpenMP)'
-#	@echo '    omptasks                     : to compile with OpenMP tasks'
-#	@echo '    part_event_tracing_tasks_on  : to compile particle event tracing and OpenMP tasks'
-#	@echo '    part_event_tracing_tasks_off : to compile particle event tracing without OpenMP tasks'
-#	@echo '    omptasks                     : to compile with OpenMP tasks'
-#	@echo '    part_event_tracing_tasks_on  : to compile particle event tracing and OpenMP tasks'
-#	@echo '    part_event_tracing_tasks_off : to compile particle event tracing without OpenMP tasks'
+#    @echo '    omptasks                     : to compile with OpenMP tasks'
+#    @echo '    part_event_tracing_tasks_on  : to compile particle event tracing and OpenMP tasks'
+#    @echo '    part_event_tracing_tasks_off : to compile particle event tracing without OpenMP tasks'
+#    @echo '    omptasks                     : to compile with OpenMP tasks'
+#    @echo '    part_event_tracing_tasks_on  : to compile particle event tracing and OpenMP tasks'
+#    @echo '    part_event_tracing_tasks_off : to compile particle event tracing without OpenMP tasks'
 	@echo
 	@echo 'Examples:'
 	@echo '  make config=verbose'
@@ -548,6 +520,22 @@ help:
 	@echo '  make machine=XXX      : include machine file in scripts/compile_tools/machine/XXX'
 	@echo '  make machine=XXX help : print help for machine'
 	@echo
+	@echo 'Environment variables needed for compilation:'
+	@echo '  SMILEICXX         : mpi c++ compiler (possibly GPU-aware) [mpicxx]'
+	@echo '  SMILEICXX.DEPS    : c++ compiler for calculating dependencies [$$SMILEICXX]'
+	@echo '  CXXFLAGS          : FLAGS for $$SMILEICXX []'
+	@echo '  LDFLAGS           : FLAGS for the linker []'
+	@echo '  HDF5_ROOT_DIR     : folder where the HDF5 library was installed [$$HDF5_ROOT_DIR]'
+	@echo '  BUILD_DIR         : custom folder for building Smilei [build]'
+	@echo '  PYTHONEXE         : python executable [python]'
+	@echo '  GPU_COMPILER      : compiler for cuda-like files [$$CC]'
+	@echo '  GPU_COMPILER_FLAGS: flags for the $$GPU_COMPILER []'
+#    @echo '  FFTW3_LIB_DIR  : FFTW3 libraries directory [$(FFTW3_LIB_DIR)]'
+#    @echo '  LIBPXR         : Picsar library directory [$(LIBPXR)]'
+	@echo
+	@echo 'Intel Inspector environment:'
+	@echo '  INSPECTOR_ROOT_DIR    : only needed to use the inspector API (__itt functions) [$(INSPECTOR_ROOT_DIR)]'
+	@echo
 	@echo 'OTHER PURPOSES:'
 	@echo '---------------'
 	@echo '  make doc              : builds the documentation'
@@ -561,19 +549,7 @@ help:
 	@echo 'SMILEI TABLES:'
 	@echo '---------------'
 	@echo '  make tables           : compilation of the tool smilei_tables'
-	@echo ''
-	@echo 'Environment variables:'
-	@echo '  SMILEICXX             : mpi c++ compiler [$(SMILEICXX)]'
-	@echo '  HDF5_ROOT_DIR         : HDF5 dir. Defaults to the value of HDF5_ROOT [$(HDF5_ROOT_DIR)]'
-	@echo '  BUILD_DIR             : directory used to store build files [$(BUILD_DIR)]'
-	@echo '  OPENMP_FLAG           : openmp flag [$(OPENMP_FLAG)]'
-	@echo '  PYTHONEXE             : python executable [$(PYTHONEXE)]'
-	@echo '  FFTW3_LIB_DIR         : FFTW3 libraries directory [$(FFTW3_LIB_DIR)]'
-	@echo '  LIBPXR                : Picsar library directory [$(LIBPXR)]'
-	@echo
-	@echo 'Intel Inspector environment:'
-	@echo '  INSPECTOR_ROOT_DIR    : only needed to use the inspector API (__itt functions) [$(INSPECTOR_ROOT_DIR)]'
-	@echo
+	@echo 
 	@echo 'https://smileipic.github.io/Smilei/'
 	@echo 'https://github.com/SmileiPIC/Smilei'
 	@echo
diff --git a/scripts/compile_tools/machine/adastra b/scripts/compile_tools/machine/adastra
index 58ba42bd1..77e86c6d9 100644
--- a/scripts/compile_tools/machine/adastra
+++ b/scripts/compile_tools/machine/adastra
@@ -37,8 +37,8 @@
 
 SMILEICXX := CC
 SMILEICXX.DEPS := $(SMILEICXX)
-# THRUSTCXX := hipcc
-THRUSTCXX := $(SMILEICXX)
+# GPU_COMPILER := hipcc
+GPU_COMPILER := $(SMILEICXX)
 
 ################################################################################
 # Feature flags
@@ -79,15 +79,6 @@ ADASTRA_OPTIMIZATION_FLAGS       := -O3 $(ADASTRA_OPTIMIZATION_LTO_FLAGS) $(ADAS
 # -g and -ggdb have no know runtime overhead expect for program size
 ADASTRA_DEBUG_FLAGS           := -g -ggdb $(ADASTRA_DEBUG_SANITIZER_FLAGS) -v # --cray-print-opts=all -craype-verbose
 
-################################################################################
-# Omp flags
-################################################################################
-
-# No need to be specific with something like:
-# -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a
-# The craype-accel-amd-gfx90a module does it for us.
-ADASTRA_OMP_FLAGS := -fopenmp
-
 ################################################################################
 # OMP GPU offloading support flags
 ################################################################################
@@ -104,12 +95,12 @@ ifneq (,$(call parse_config,gpu_amd))
     # When using hip/cuda to build kernels
     # When you ask CCE to drive the hip compilation
     # NOTE: the -x hip flag MUST come before the .cu files !
-    ADASTRA_ACCELERATOR_GPU_KERNEL_FLAGS := -x hip
-    # ADASTRA_ACCELERATOR_GPU_KERNEL_FLAGS += --offload-arch=$(ADASTRA_ACCELERATOR_GPU_TARGET)
+    ADASTRA_GPU_COMPILER_FLAGS := -x hip
+    # ADASTRA_GPU_COMPILER_FLAGS += --offload-arch=$(ADASTRA_ACCELERATOR_GPU_TARGET)
 
     # Diagnostics
-    # ADASTRA_ACCELERATOR_GPU_KERNEL_FLAGS += --save-temps # Cuda/HIP kernel info. To use with the AMD compiler (the Cray compiler don't produce interesting details, only the Clang LLVM IR)
-    # ADASTRA_ACCELERATOR_GPU_KERNEL_FLAGS += -Rpass-analysis=kernel-resource-usage
+    # ADASTRA_GPU_COMPILER_FLAGS += --save-temps # Cuda/HIP kernel info. To use with the AMD compiler (the Cray compiler don't produce interesting details, only the Clang LLVM IR)
+    # ADASTRA_GPU_COMPILER_FLAGS += -Rpass-analysis=kernel-resource-usage
 endif
 
 ################################################################################
@@ -156,17 +147,16 @@ ADASTRA_LIBRARIES := $(ADASTRA_LIBRARIES_OMP_STATIC) $(ADASTRA_LIBRARIES_MPI) $(
 ################################################################################
 
 # Inherit of all the typical *.cpp compilation options
-# ACCELERATOR_GPU_KERNEL_FLAGS := $(CXXFLAGS) # This command is soo weird, it doesn't work as expected
-ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes) $(DIRS:%=-I%)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_FEATURE_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_WARNING_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_OPTIMIZATION_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_DEBUG_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_OMP_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS)
-# ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_ACCELERATOR_GPU_OMP_FLAGS) # Disabled due to unsupported option '-fopenmp-targets=' for language mode 'HIP'
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_ACCELERATOR_GPU_KERNEL_FLAGS)
-ACCELERATOR_GPU_KERNEL_FLAGS += $(ADASTRA_INCLUDES)
+# GPU_COMPILER_FLAGS := $(CXXFLAGS) # This command is soo weird, it doesn't work as expected
+GPU_COMPILER_FLAGS += $(ADASTRA_FEATURE_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_WARNING_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_OPTIMIZATION_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_DEBUG_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_OMP_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS)
+# GPU_COMPILER_FLAGS += $(ADASTRA_ACCELERATOR_GPU_OMP_FLAGS) # Disabled due to unsupported option '-fopenmp-targets=' for language mode 'HIP'
+GPU_COMPILER_FLAGS += $(ADASTRA_GPU_COMPILER_FLAGS)
+GPU_COMPILER_FLAGS += $(ADASTRA_INCLUDES)
 
 CXXFLAGS += $(ADASTRA_FEATURE_FLAGS)
 CXXFLAGS += $(ADASTRA_WARNING_FLAGS)
diff --git a/scripts/compile_tools/machine/jean_zay_gpu b/scripts/compile_tools/machine/jean_zay_gpu
index 83b2b0078..d088ae26f 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu
+++ b/scripts/compile_tools/machine/jean_zay_gpu
@@ -7,28 +7,22 @@
 #
 
 SMILEICXX.DEPS = g++
-THRUSTCXX = nvcc
+GPU_COMPILER = nvcc
 
-ACCELERATOR_GPU_FLAGS += -w
+CXXFLAGS += -w
 # IDRIS config for curand
-ACCELERATOR_GPU_FLAGS += -ta=tesla:cc70 -std=c++14  -lcurand 
-# ACCELERATOR_GPU_FLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -ta=tesla:cc70 -std=c++14 -lcurand
+# CXXFLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -Minfo=accel # what is offloaded/copied
+# CXXFLAGS += -Minfo=all   # very verbose output
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -O3 --std c++14 $(DIRS:%=-I%)
-ACCELERATOR_GPU_KERNEL_FLAGS += --expt-relaxed-constexpr
-ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes)
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_70
-ACCELERATOR_GPU_FLAGS        += -Minfo=accel # what is offloaded/copied
-# ACCELERATOR_GPU_FLAGS        += -Minfo=all   # very verbose output
-
-# OpenACC support
-ACCELERATOR_GPU_FLAGS += -DSMILEI_OPENACC_MODE
-ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_OPENACC_MODE
+GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70
+GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
 # To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# ACCELERATOR_GPU_FLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# ACCELERATOR_GPU_KERNEL_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS                      += -mp=gpu
+# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
+# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
+# LDFLAGS += -mp=gpu
 
 LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda
-CXXFLAGS +=  -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
+CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_A100 b/scripts/compile_tools/machine/jean_zay_gpu_A100
index 16c86bc90..de5cc32ec 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_A100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_A100
@@ -7,28 +7,22 @@
 #
 
 SMILEICXX.DEPS = nvcc
-THRUSTCXX = nvcc
+GPU_COMPILER = nvcc
 
-ACCELERATOR_GPU_FLAGS += -w
+CXXFLAGS += -w
 # IDRIS config for curand
-ACCELERATOR_GPU_FLAGS += -tp=zen3 -ta=tesla:cc80 -std=c++14  -lcurand -Mcudalib=curand
-# ACCELERATOR_GPU_FLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -tp=zen3 -ta=tesla:cc80 -std=c++14  -lcurand -Mcudalib=curand
+# CXXFLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -Minfo=accel # what is offloaded/copied
+# CXXFLAGS += -Minfo=all   # very verbose output
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -O3 --std c++14 $(DIRS:%=-I%)
-ACCELERATOR_GPU_KERNEL_FLAGS += --expt-relaxed-constexpr
-ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes)
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_80
-ACCELERATOR_GPU_FLAGS        += -Minfo=accel # what is offloaded/copied
-# ACCELERATOR_GPU_FLAGS        += -Minfo=all   # very verbose output
-
-# OpenACC support
-ACCELERATOR_GPU_FLAGS += -DSMILEI_OPENACC_MODE
-ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_OPENACC_MODE
+GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_80
+GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
 # To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# ACCELERATOR_GPU_FLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# ACCELERATOR_GPU_KERNEL_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS                      += -mp=gpu
+# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
+# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
+# LDFLAGS += -mp=gpu
 
 LDFLAGS += -ta=tesla:cc80 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda -L/gpfslocalsys/cuda/11.2/lib64/
-CXXFLAGS +=  -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
+CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_nvcc b/scripts/compile_tools/machine/jean_zay_gpu_nvcc
index 19721298c..b609cf995 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_nvcc
+++ b/scripts/compile_tools/machine/jean_zay_gpu_nvcc
@@ -7,28 +7,22 @@
 #
 
 SMILEICXX.DEPS = nvcc
-THRUSTCXX = nvcc
+GPU_COMPILER = nvcc
 
-ACCELERATOR_GPU_FLAGS += -w
+CXXFLAGS += -w
 # IDRIS config for curand
-ACCELERATOR_GPU_FLAGS += -ta=tesla:cc70 -std=c++14  -lcurand -Mcudalib=curand
-# ACCELERATOR_GPU_FLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -ta=tesla:cc70 -std=c++14  -lcurand -Mcudalib=curand
+# CXXFLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -Minfo=accel # what is offloaded/copied
+# CXXFLAGS += -Minfo=all   # very verbose output
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -O3 --std c++14 $(DIRS:%=-I%)
-ACCELERATOR_GPU_KERNEL_FLAGS += --expt-relaxed-constexpr
-ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes)
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_70
-ACCELERATOR_GPU_FLAGS        += -Minfo=accel # what is offloaded/copied
-# ACCELERATOR_GPU_FLAGS        += -Minfo=all   # very verbose output
-
-# OpenACC support
-ACCELERATOR_GPU_FLAGS += -DSMILEI_OPENACC_MODE
-ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_OPENACC_MODE
+GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70
+GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
 # To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# ACCELERATOR_GPU_FLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# ACCELERATOR_GPU_KERNEL_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS                      += -mp=gpu
+# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
+# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
+# LDFLAGS += -mp=gpu
 
 LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda -L/gpfslocalsys/cuda/11.2/lib64/
-CXXFLAGS +=  -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
+CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
diff --git a/scripts/compile_tools/machine/juanjo_cpu b/scripts/compile_tools/machine/juanjo_cpu
index bf3e6f875..17cae34bf 100644
--- a/scripts/compile_tools/machine/juanjo_cpu
+++ b/scripts/compile_tools/machine/juanjo_cpu
@@ -1,6 +1,6 @@
-ACCELERATOR_GPU_FLAGS = -acc -Mcudalib=curand -ta=tesla:cc35 -Minfo=accel
+CXXFLAGS = -acc -Mcudalib=curand -ta=tesla:cc35 -Minfo=accel
 LDFLAGS += -ta=tesla:cc35 -L/usr/local/openmpi/lib/ -L/opt/nvidia/hpc_sdk/Linux_x86_64/21.2/cuda/11.2/lib64/ -lcudart 
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_35 -I/usr/local/openmpi/include/ -I/opt/nvidia/hpc_sdk/Linux_x86_64/21.2/cuda/11.2/include/ 
+GPU_COMPILER_FLAGS += -arch=sm_35 -I/usr/local/openmpi/include/ -I/opt/nvidia/hpc_sdk/Linux_x86_64/21.2/cuda/11.2/include/ 
 CXXFLAGS += -I/usr/local/hdf5/include/ -I/usr/local/openmpi/include/
 
 
diff --git a/scripts/compile_tools/machine/llracp b/scripts/compile_tools/machine/llracp
index 68db0fb63..4384934a5 100644
--- a/scripts/compile_tools/machine/llracp
+++ b/scripts/compile_tools/machine/llracp
@@ -15,8 +15,8 @@
 # module load cuda/10.1
 # export MANPATH=/usr/local/cuda-10.1/doc/man:$MANPATH
 
-ACCELERATOR_GPU_FLAGS += -ta=tesla:cc70
+CXXFLAGS += -ta=tesla:cc70
 LDFLAGS  += -ta=tesla:cc70 -L/usr/local/cuda-10.1/lib64 -lcudart
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_70 -I/home/llr/galop/derouil/applications/pgi-19.10_mpi/linux86-64-llvm/2019/mpi/openmpi-3.1.3/include
+GPU_COMPILER_FLAGS += -arch=sm_70 -I/home/llr/galop/derouil/applications/pgi-19.10_mpi/linux86-64-llvm/2019/mpi/openmpi-3.1.3/include
 
diff --git a/scripts/compile_tools/machine/ruche_gpu b/scripts/compile_tools/machine/ruche_gpu
index 6c8a2aca4..35890fb9d 100644
--- a/scripts/compile_tools/machine/ruche_gpu
+++ b/scripts/compile_tools/machine/ruche_gpu
@@ -17,8 +17,8 @@
 # For HDF5 (Intel compiled) compatibility in this PGI environment
 LDFLAGS  += -L/gpfs/softs/spack/opt/spack/linux-centos7-haswell/gcc-4.8.5/intel-19.0.3-k6ro4ofsaw2mzxakwhkygta3ihg5e4aj/lib/intel64 -lintlc -lsvml -lz
 
-ACCELERATOR_GPU_FLAGS += -ta=tesla:cc70
+CXXFLAGS += -ta=tesla:cc70
 LDFLAGS  += -ta=tesla:cc70 -L/gpfs/softs/spack/opt/spack/linux-centos7-haswell/gcc-4.8.5/pgi-20.1-5qavttbng2gxzgsoq6jujyocczsvlc2o/linux86-64-llvm/2020/cuda/10.1/lib64 -lcudart
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_70
+GPU_COMPILER_FLAGS += -arch=sm_70
 
diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2
index 6c5b05682..afc9e7921 100644
--- a/scripts/compile_tools/machine/ruche_gpu2
+++ b/scripts/compile_tools/machine/ruche_gpu2
@@ -9,31 +9,26 @@
 
 SMILEICXX.DEPS = nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
 
-THRUSTCXX = nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
+GPU_COMPILER = nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
 
-ACCELERATOR_GPU_FLAGS += -w
+CXXFLAGS += -w
 # IDRIS config for curand
-ACCELERATOR_GPU_FLAGS += -gpu=cc70,cc80 -acc -std=c++14  -lcurand #-ta=tesla:cc70 -std=c++14  -lcurand -cudalib=curand # do not put -cuda here
-# ACCELERATOR_GPU_FLAGS += --expt-relaxed-constexpr
+CXXFLAGS += -gpu=cc70,cc80 -acc -std=c++14  -lcurand #-ta=tesla:cc70 -std=c++14  -lcurand -cudalib=curand # do not put -cuda here
+# CXXFLAGS += --expt-relaxed-constexpr
 
 ACCELERATOR_CUDA_FLAGS += -w -gpu=cc70,cc80 -cuda -std=c++14  -lcurand #not used at the moment
 
 
-ACCELERATOR_GPU_KERNEL_FLAGS += -O3 --std c++14 $(DIRS:%=-I%) 
+GPU_COMPILER_FLAGS += -O3 --std c++14
 
-ACCELERATOR_GPU_KERNEL_FLAGS += --expt-relaxed-constexpr
-ACCELERATOR_GPU_KERNEL_FLAGS += $(shell $(PYTHONCONFIG) --includes)
-ACCELERATOR_GPU_KERNEL_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 then rm build/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.o then compile again with sm_89 (no make clean !) #adapt for 2D  
-ACCELERATOR_GPU_FLAGS        += -Minfo=accel # what is offloaded/copied
-# ACCELERATOR_GPU_FLAGS        += -Minfo=all   # very verbose output
-
-# OpenACC support
-ACCELERATOR_GPU_FLAGS += -DSMILEI_OPENACC_MODE
-ACCELERATOR_GPU_KERNEL_FLAGS += -DSMILEI_OPENACC_MODE
+GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
+GPU_COMPILER_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 then rm build/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.o then compile again with sm_89 (no make clean !) #adapt for 2D  
+CXXFLAGS        += -Minfo=accel # what is offloaded/copied
+# CXXFLAGS        += -Minfo=all   # very verbose output
 
 # To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# ACCELERATOR_GPU_FLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# ACCELERATOR_GPU_KERNEL_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
+# CXXFLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
+# GPU_COMPILER_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
 # LDFLAGS                      += -mp=gpu
 
 LDFLAGS +=  -L/gpfs/softs/spack_0.17/opt/spack/linux-centos7-cascadelake/gcc-11.2.0/gettext-0.21-bppg5g6ijfrvi7sdylhhg3t5f6v2fh2x/lib/

From 8c3a0f2f6fc7afd5db84f79d34c5fa93eb5b354e Mon Sep 17 00:00:00 2001
From: "charles.prouveur" <charles.prouveur@gmail.com>
Date: Tue, 23 Jan 2024 11:50:16 +0100
Subject: [PATCH 2/6] compiler used for deps is g++, removed useless machine
 file

---
 .../compile_tools/machine/jean_zay_gpu_A100   |  2 +-
 .../{jean_zay_gpu => jean_zay_gpu_V100}       |  0
 .../compile_tools/machine/jean_zay_gpu_nvcc   | 28 -------------------
 scripts/compile_tools/machine/ruche_gpu2      |  2 +-
 4 files changed, 2 insertions(+), 30 deletions(-)
 rename scripts/compile_tools/machine/{jean_zay_gpu => jean_zay_gpu_V100} (100%)
 delete mode 100644 scripts/compile_tools/machine/jean_zay_gpu_nvcc

diff --git a/scripts/compile_tools/machine/jean_zay_gpu_A100 b/scripts/compile_tools/machine/jean_zay_gpu_A100
index de5cc32ec..268b31acd 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_A100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_A100
@@ -6,7 +6,7 @@
 # http://www.idris.fr/jean-zay
 #
 
-SMILEICXX.DEPS = nvcc
+SMILEICXX.DEPS = g++
 GPU_COMPILER = nvcc
 
 CXXFLAGS += -w
diff --git a/scripts/compile_tools/machine/jean_zay_gpu b/scripts/compile_tools/machine/jean_zay_gpu_V100
similarity index 100%
rename from scripts/compile_tools/machine/jean_zay_gpu
rename to scripts/compile_tools/machine/jean_zay_gpu_V100
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_nvcc b/scripts/compile_tools/machine/jean_zay_gpu_nvcc
deleted file mode 100644
index b609cf995..000000000
--- a/scripts/compile_tools/machine/jean_zay_gpu_nvcc
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Machine file for Jean Zay at IDRIS
-# __________________________________________________________
-#
-# Documentation:
-# http://www.idris.fr/jean-zay
-#
-
-SMILEICXX.DEPS = nvcc
-GPU_COMPILER = nvcc
-
-CXXFLAGS += -w
-# IDRIS config for curand
-CXXFLAGS += -ta=tesla:cc70 -std=c++14  -lcurand -Mcudalib=curand
-# CXXFLAGS += --expt-relaxed-constexpr
-CXXFLAGS += -Minfo=accel # what is offloaded/copied
-# CXXFLAGS += -Minfo=all   # very verbose output
-
-GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70
-GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
-
-# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS += -mp=gpu
-
-LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda -L/gpfslocalsys/cuda/11.2/lib64/
-CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2
index afc9e7921..0a004f782 100644
--- a/scripts/compile_tools/machine/ruche_gpu2
+++ b/scripts/compile_tools/machine/ruche_gpu2
@@ -7,7 +7,7 @@
 
 # Compile command: make -j 40  machine="ruche_gpu2" config="gpu_nvidia noopenmp detailed_timers verbose"
 
-SMILEICXX.DEPS = nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
+SMILEICXX.DEPS = g++ #nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
 
 GPU_COMPILER = nvcc -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/openmpi-4.1.5-ckfuippq6gf6qsilwitd7d2zyd5bng32/include/ -I/gpfs/softs/spack_0.17/opt/spack/linux-centos7-haswell/nvhpc-23.7/hdf5-1.12.0-3em63nl4p5tmv37offfmuvz2uswvgwzv/include/
 

From dc644af8c965e736cea30690c9e5071f0785bc26 Mon Sep 17 00:00:00 2001
From: cprouveur <cprouveur@login6.head.adastra.cines.fr>
Date: Thu, 1 Feb 2024 23:46:46 +0100
Subject: [PATCH 3/6] fix to makefile changes for adastra / amd gpus

---
 makefile                         | 4 ++--
 src/Particles/nvidiaParticles.cu | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/makefile b/makefile
index 8ab1c925e..4a9271d14 100755
--- a/makefile
+++ b/makefile
@@ -210,8 +210,8 @@ endif
 # AMD GPUs
 ifneq (,$(call parse_config,gpu_amd))
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
-	GPU_COMPILER_FLAGS = -x hip -DSMILEI_ACCELERATOR_MODE $(DIRS:%=-I%) $(PY_FLAGS)
-	
+	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS)
+	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
 	
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
index c0d66c7de..d7a63f0b3 100644
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -12,6 +12,10 @@
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/tuple.h>
+#include <thrust/count.h>
+#include <thrust/remove.h>
+#include <thrust/sort.h>
+
 
 #include "Patch.h"
 #include "gpu.h"

From a801484a9576510b527368297a38107e4dd6c6e9 Mon Sep 17 00:00:00 2001
From: "charles.prouveur@cea.fr" <charles.prouveur@cea.fr>
Date: Fri, 2 Feb 2024 00:00:24 +0100
Subject: [PATCH 4/6] added deps flags in make file in order to avoid conflicts
 due to gpu flags now in CXXFLAS; updated jean_zay_gpu_V100 and
 jean_zay_gpu_A100

---
 makefile                                        | 17 +++++++++++------
 scripts/compile_tools/machine/jean_zay_gpu_A100 | 13 +++----------
 scripts/compile_tools/machine/jean_zay_gpu_V100 | 17 +++++------------
 3 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/makefile b/makefile
index 8ab1c925e..91e19f7a4 100755
--- a/makefile
+++ b/makefile
@@ -103,6 +103,7 @@ endif
 ifneq ($(strip $(HDF5_ROOT_DIR)),)
 CXXFLAGS += -I$(HDF5_ROOT_DIR)/include
 LDFLAGS := -L$(HDF5_ROOT_DIR)/lib  $(LDFLAGS)
+DEPSFLAGS += -I$(HDF5_ROOT_DIR)/include
 endif
 # Boost library
 ifneq ($(strip $(BOOST_ROOT_DIR)),)
@@ -112,8 +113,11 @@ endif
 LDFLAGS += -lhdf5
 # Include subdirs
 CXXFLAGS += $(DIRS:%=-I%)
+DEPSFLAGS += $(DIRS:%=-I%)
 # Python-related flags
 CXXFLAGS += -I$(BUILD_DIR)/src/Python
+DEPSFLAGS += -I$(BUILD_DIR)/src/Python
+
 PYSCRIPTS = $(shell find src/Python -name \*.py)
 PYHEADERS := $(addprefix $(BUILD_DIR)/, $(PYSCRIPTS:.py=.pyh))
 PY_CXXFLAGS := $(shell $(PYTHONCONFIG) --includes)
@@ -199,8 +203,9 @@ ifneq (,$(call parse_config,gpu_nvidia))
 	
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
 	GPU_COMPILER = nvcc
-	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%) $(PY_FLAGS)
-	
+	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
+	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
+
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
 	
@@ -211,7 +216,7 @@ endif
 ifneq (,$(call parse_config,gpu_amd))
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
 	GPU_COMPILER_FLAGS = -x hip -DSMILEI_ACCELERATOR_MODE $(DIRS:%=-I%) $(PY_FLAGS)
-	
+	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)	
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
 	
@@ -336,13 +341,13 @@ $(BUILD_DIR)/%.pyh: %.py
 $(BUILD_DIR)/%.d: %.cpp
 	@echo "Checking dependencies for $<"
 	$(Q) if [ ! -d "$(@D)" ]; then mkdir -p "$(@D)"; fi;
-	$(Q) $(SMILEICXX.DEPS) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
+	$(Q) $(SMILEICXX.DEPS) $(DEPSFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
 
 # Calculate dependencies: special for Params.cpp which needs pyh files
 $(BUILD_DIR)/src/Params/Params.d: src/Params/Params.cpp $(PYHEADERS)
 	@echo "Checking dependencies for $<"
 	$(Q) if [ ! -d "$(@D)" ]; then mkdir -p "$(@D)"; fi;
-	$(Q) $(SMILEICXX.DEPS) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
+	$(Q) $(SMILEICXX.DEPS) $(DEPSFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
 
 ifeq ($(findstring icpc, $(COMPILER_INFO)), icpc)
 $(BUILD_DIR)/src/Diagnostic/DiagnosticScalar.o : src/Diagnostic/DiagnosticScalar.cpp
@@ -466,7 +471,7 @@ tables_clean:
 $(TABLES_BUILD_DIR)/%.d: %.cpp
 	@echo "Checking dependencies for $<"
 	$(Q) if [ ! -d "$(@D)" ]; then mkdir -p "$(@D)"; fi;
-	$(Q) $(SMILEICXX) $(CXXFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
+	$(Q) $(SMILEICXX) $(DPSFLAGS) -MF"$@" -MM -MP -MT"$@ $(@:.d=.o)" $<
 
 # Compile cpps
 $(TABLES_BUILD_DIR)/%.o : $(TABLES_DIR)/%.cpp
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_A100 b/scripts/compile_tools/machine/jean_zay_gpu_A100
index 268b31acd..92b3f2fb7 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_A100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_A100
@@ -7,22 +7,15 @@
 #
 
 SMILEICXX.DEPS = g++
-GPU_COMPILER = nvcc
+#GPU_COMPILER = nvcc
 
 CXXFLAGS += -w
-# IDRIS config for curand
 CXXFLAGS += -tp=zen3 -ta=tesla:cc80 -std=c++14  -lcurand -Mcudalib=curand
-# CXXFLAGS += --expt-relaxed-constexpr
 CXXFLAGS += -Minfo=accel # what is offloaded/copied
 # CXXFLAGS += -Minfo=all   # very verbose output
+CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1 
 
 GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_80
 GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
-# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS += -mp=gpu
-
-LDFLAGS += -ta=tesla:cc80 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda -L/gpfslocalsys/cuda/11.2/lib64/
-CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
+LDFLAGS += -ta=tesla:cc80 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda #-L/gpfslocalsys/cuda/11.2/lib64/
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_V100 b/scripts/compile_tools/machine/jean_zay_gpu_V100
index d088ae26f..91d68a4ff 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_V100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_V100
@@ -7,22 +7,15 @@
 #
 
 SMILEICXX.DEPS = g++
-GPU_COMPILER = nvcc
+#GPU_COMPILER = nvcc
 
 CXXFLAGS += -w
-# IDRIS config for curand
-CXXFLAGS += -ta=tesla:cc70 -std=c++14 -lcurand
-# CXXFLAGS += --expt-relaxed-constexpr
-CXXFLAGS += -Minfo=accel # what is offloaded/copied
+CXXFLAGS += -ta=tesla:cc70 -std=c++14 -lcurand -Minfo=accel # what is offloaded/copied
 # CXXFLAGS += -Minfo=all   # very verbose output
+CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
 
-GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70
-GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
-# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
-# CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
-# GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
-# LDFLAGS += -mp=gpu
+GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70 
+GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
 LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda
-CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1

From 38c3a9c5d397b21a91a68976472bd121d766aa0b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Wed, 28 Feb 2024 16:27:17 +0100
Subject: [PATCH 5/6] gpu makefile

---
 doc/Sphinx/Overview/releases.rst | 6 +++++-
 makefile                         | 5 ++---
 smilei.sh                        | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/doc/Sphinx/Overview/releases.rst b/doc/Sphinx/Overview/releases.rst
index 13376c0c2..ea10c1179 100755
--- a/doc/Sphinx/Overview/releases.rst
+++ b/doc/Sphinx/Overview/releases.rst
@@ -23,10 +23,14 @@ You can find older, `unsupported versions here <https://github.com/SmileiPIC/Smi
 Changes made in the repository (not released)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+* GPU:
+
+  * Compilation simplified and better documented.
+
 * Happi:
 
   * In ``Scalar``, it is now possible to make an operation on scalars such as ``"Uelm+Ukin"``.
-    The list of available scalars can be obtained from ``getScalars()``.
+  * The list of available scalars can be obtained from ``getScalars()``.
   * New arguments ``xoffset`` and ``yoffset`` to shift plot coordinates.
   * New argument ``timestep_indices`` as an alternative to ``timesteps``.
   * Changed coordinate reference for 2D probe in 3D or AM geometry
diff --git a/makefile b/makefile
index 300577f5d..194443f12 100755
--- a/makefile
+++ b/makefile
@@ -18,7 +18,6 @@ SMILEICXX ?= mpicxx
 PYTHONEXE ?= python
 HDF5_ROOT_DIR ?= $(HDF5_ROOT)
 BOOST_ROOT_DIR ?= $(BOOST_ROOT)
-GPU_COMPILER ?= $(CC)
 TABLES_BUILD_DIR ?= tools/tables/build
 
 #-----------------------------------------------------
@@ -202,10 +201,9 @@ ifneq (,$(call parse_config,gpu_nvidia))
 	override config += noopenmp # Prevent openmp for nvidia
 	
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
-	GPU_COMPILER = nvcc
+	GPU_COMPILER ?= nvcc
 	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
-
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
 	
@@ -215,6 +213,7 @@ endif
 # AMD GPUs
 ifneq (,$(call parse_config,gpu_amd))
 	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
+	GPU_COMPILER ?= $(CC)
 	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
diff --git a/smilei.sh b/smilei.sh
index 58c400e15..393b76664 100755
--- a/smilei.sh
+++ b/smilei.sh
@@ -97,6 +97,6 @@ for namelist in "${namelist_files[@]}"; do
     cp $namelist $outdir
 done
 cd $outdir
-$mpiexe -np $proc $smilei "${namelists[@]}"
+$mpiexe --map-by ppr:$proc:socket:pe=$OMP_NUM_THREADS -np $proc $smilei "${namelists[@]}"
 cd $H
 

From 2a8c0fb45e6362fee631a2dd313b31e869d8590b Mon Sep 17 00:00:00 2001
From: Frederic Perez <frederic.perez@polytechnique.edu>
Date: Thu, 29 Feb 2024 11:54:26 +0100
Subject: [PATCH 6/6] makefile

---
 makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/makefile b/makefile
index 194443f12..61224c25a 100755
--- a/makefile
+++ b/makefile
@@ -225,7 +225,6 @@ endif
 #activate openmp unless noopenmp flag
 # For Fujitsu compiler: -Kopenmp
 ifeq (,$(call parse_config,noopenmp))
-$(info OpenMP activated)
 	ifeq ($(findstring FCC, $(COMPILER_INFO)), FCC)
 		OPENMP_FLAG ?= -Kopenmp -Kopenmp_simd
 	else ifeq ($(findstring FCCpx, $(COMPILER_INFO)), FCCpx)