diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6faa6ff17..e2efed6dc 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -8,14 +8,14 @@ stages:
     - compile_debug
     - compile_no_mpi_threadmultiple
     - compile_no_openmp
-    - compile_omptasks
-    - run_omptasks
+#    - compile_omptasks
+#    - run_omptasks
 
 install:
   stage: install
   only:
     - develop
-
+ 
   script:
     # Force workdir cleaning in case of retried
     - echo "CI_PIPELINE_ID = " $CI_PIPELINE_ID
@@ -33,7 +33,7 @@ compile_default:
   stage: compile_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -44,7 +44,7 @@ runQuick:
   stage: run_quick
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -55,7 +55,7 @@ run1D:
   stage: run_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -67,7 +67,7 @@ run2D:
   stage: run_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -81,7 +81,7 @@ run3D:
   stage: run_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -96,7 +96,7 @@ runAM:
   stage: run_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -108,7 +108,7 @@ runCollisions:
   stage: run_default
   only:
     - develop
-
+ 
   script:
     # Move in test dir
     - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
@@ -164,21 +164,21 @@ compile_no_openmp:
     - make clean
     - python validation/validation.py -k noopenmp -c -v
 
-compile_omptasks:
-  stage: compile_omptasks
-  only:
-    - develop
-
-  script:
-    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei
-    - make clean
-    - python validation/validation.py -k omptasks -c -v
-
-run_omptasks:
-  stage: run_omptasks
-  only:
-    - develop
-    
-  script:
-    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
-    - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v
+#compile_omptasks:
+#  stage: compile_omptasks
+#  only:
+#    - develop
+#
+#  script:
+#    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei
+#    - make clean
+#    - python validation/validation.py -k omptasks -c -v
+#
+#run_omptasks:
+#  stage: run_omptasks
+#  only:
+#    - develop
+#    
+#  script:
+#    - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation
+#    - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v
diff --git a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
index a50614236..cb8c8f26a 100644
--- a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
+++ b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_medium.py
@@ -66,7 +66,7 @@ def InitialChargeDensity(x, y, z):
      number_of_patches = kPatchPerGridDimension,
      EM_boundary_conditions = [ ["periodic"] ],
      print_every = 10,
-     random_seed = smilei_mpi_rank)
+     )
 
 Vectorization(mode = "off")
 
diff --git a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
index 548746977..a627232f9 100644
--- a/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
+++ b/benchmarks/gpu/tst3d_gpu_o2_thermal_plasma_short.py
@@ -61,7 +61,6 @@
     gpu_computing = True,
 
     # random_seed = 0xDEADBEEF,
-    random_seed = smilei_mpi_rank,
 )
 
 Vectorization(
diff --git a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
index 524f564d0..92c598c19 100644
--- a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
+++ b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_medium.py
@@ -67,7 +67,7 @@ def InitialChargeDensity(x, y, z):
      number_of_patches = kPatchPerGridDimension,
      EM_boundary_conditions = [ ["periodic"] ],
      print_every = 10,
-     random_seed = smilei_mpi_rank)
+     )
 
 Vectorization(mode = "on")
 
diff --git a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
index 3672fd9d0..bc553aa62 100644
--- a/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
+++ b/benchmarks/gpu/tst3d_v_o2_thermal_plasma_short.py
@@ -61,7 +61,6 @@
     gpu_computing = False,
 
     # random_seed = 0xDEADBEEF,
-    random_seed = smilei_mpi_rank,
 )
 
 Vectorization(
diff --git a/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py b/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py
deleted file mode 100755
index cab778662..000000000
--- a/benchmarks/tst1d_24_cir_plane_wave_BTIS3.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# _____________________________________________________________________________
-#
-# Electron trajectory in a plane wave 
-# with a Gaussian temporal profile.
-#
-# Validation in the relativist regime
-# 
-# _____________________________________________________________________________
-
-import math
-
-# _____________________________________________________________________________
-# Main parameters
-
-l0 = 2.0*math.pi              # laser wavelength
-t0 = l0                       # optical cicle
-Lx = 80*l0
-
-n0 = 1e-8                     # particle density
-
-Tsim = 150.*t0                 # duration of the simulation
-resx = 64.                    # nb of cells in one laser wavelength
-
-dx = l0/resx                            # space step
-dt  = 0.95 * dx                 		# timestep (0.95 x CFL)
-
-a0 = 5
-start = 0                               # Laser start
-fwhm = 10*t0                            # Gaussian time fwhm
-duration = 90*t0                        # Laser duration
-center = duration*0.5                   # Laser profile center
-
-pusher = "borisBTIS3"
-
-# Density profile for inital location of the particles
-def n0_(x):
-        if (dx<x<2*dx):
-                return n0
-        else:
-                return 0.
-
-# ______________________________________________________________________________
-# Namelists
-
-Main(
-    geometry = "1Dcartesian",
-    
-    interpolation_order = 2 ,
-    
-    cell_length = [dx],
-    grid_length  = [Lx],
-    
-    number_of_patches = [32],
-    
-    timestep = dt,
-    simulation_time = Tsim,
-    
-    EM_boundary_conditions = [ ['silver-muller', 'reflective'] ],
-
-    use_BTIS3_interpolation = True,
-    
-)
-
-LaserPlanar1D(
-    box_side         = "xmin",
-    a0               = a0,
-    omega            = 1.,
-    polarization_phi = 0.,
-    ellipticity      = 1.,
-    time_envelope    = tgaussian(start=start,duration=duration,fwhm=fwhm,center=center,order=2)
-)
-
-Species(
-    name = "electron_" + pusher,
-    position_initialization = "centered",
-    momentum_initialization = "cold",
-    particles_per_cell = 10,
-    c_part_max = 1.0,
-    mass = 1.0,
-    charge = -1.0,
-    charge_density = n0_,
-    mean_velocity = [0., 0.0, 0.0],
-    temperature = [0.],
-    pusher = pusher,
-    boundary_conditions = [["remove"]],
-    is_test = True
-)
-    
-DiagTrackParticles(
-    species = "electron_" + pusher,
-    every = 10,
-    flush_every = 100,
-)
-
-DiagFields( every = 1000 )
diff --git a/benchmarks/tst2d_18_em_pml.py b/benchmarks/tst2d_18_em_pml.py
index 18dc5ee09..703fe50fc 100755
--- a/benchmarks/tst2d_18_em_pml.py
+++ b/benchmarks/tst2d_18_em_pml.py
@@ -25,7 +25,6 @@
                               ['PML','PML'],
                              ],
     number_of_pml_cells             = [[10,10],[10,10]],
-    random_seed = smilei_mpi_rank
 )
 
 Antenna(
diff --git a/benchmarks/tst2d_s_o4_laser_wake_vay.py b/benchmarks/tst2d_s_o4_laser_wake_vay.py
index 4a4725b51..bb75f29e6 100644
--- a/benchmarks/tst2d_s_o4_laser_wake_vay.py
+++ b/benchmarks/tst2d_s_o4_laser_wake_vay.py
@@ -28,8 +28,6 @@
 
     solve_poisson = False,
     print_every = 100,
-
-    random_seed = smilei_mpi_rank
 )
 
 MovingWindow(
diff --git a/benchmarks/tst2d_s_o4_radiation_pressure_acc.py b/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
index 7adbc8844..755cbf763 100755
--- a/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_s_o4_radiation_pressure_acc.py
@@ -28,8 +28,6 @@
         ['silver-muller'],
         ['periodic'],
     ],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py b/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
index 9d4c9af87..6a8530bf0 100644
--- a/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_tasks_01_radiation_pressure_acc.py
@@ -39,7 +39,6 @@
         ['periodic'],
     ],
     cluster_width = 16,
-    random_seed = smilei_mpi_rank
 )
 
 
diff --git a/benchmarks/tst2d_v_o2_em_propagation.py b/benchmarks/tst2d_v_o2_em_propagation.py
index cc152fa80..76741f1bd 100644
--- a/benchmarks/tst2d_v_o2_em_propagation.py
+++ b/benchmarks/tst2d_v_o2_em_propagation.py
@@ -32,8 +32,6 @@
     ],
 
     EM_boundary_conditions_k = [[cos(ang), sin(ang)],[-1.,0.],[0.,1.],[0.,-1.]],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_em_propagation.py b/benchmarks/tst2d_v_o4_em_propagation.py
index c6ed8b064..2e15305b0 100644
--- a/benchmarks/tst2d_v_o4_em_propagation.py
+++ b/benchmarks/tst2d_v_o4_em_propagation.py
@@ -32,8 +32,6 @@
     ],
 
     EM_boundary_conditions_k = [[cos(ang), sin(ang)],[-1.,0.],[0.,1.],[0.,-1.]],
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_laser_wake_vay.py b/benchmarks/tst2d_v_o4_laser_wake_vay.py
index 969263838..9e56337d0 100644
--- a/benchmarks/tst2d_v_o4_laser_wake_vay.py
+++ b/benchmarks/tst2d_v_o4_laser_wake_vay.py
@@ -28,8 +28,6 @@
 
     solve_poisson = False,
     print_every = 100,
-
-    random_seed = smilei_mpi_rank
 )
 
 Vectorization(
diff --git a/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py b/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
index 7db269c5e..91907aee9 100755
--- a/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
+++ b/benchmarks/tst2d_v_o4_multiphoton_Breit_Wheeler.py
@@ -95,9 +95,6 @@ def n0_positron(x,y):
     simulation_time = Tsim,
 
     EM_boundary_conditions = [field_cond, field_cond],
-
-    random_seed = smilei_mpi_rank,
-
     reference_angular_frequency_SI = wr
 )
 
diff --git a/benchmarks/tst2d_v_o4_radiation_pressure_acc.py b/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
index c0feb79d2..353418604 100644
--- a/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
+++ b/benchmarks/tst2d_v_o4_radiation_pressure_acc.py
@@ -28,8 +28,6 @@
         ['silver-muller'],
         ['periodic'],
     ],
-
-    random_seed = smilei_mpi_rank
 )
 
 
diff --git a/benchmarks/tst3d_s_o4_em_propagation.py b/benchmarks/tst3d_s_o4_em_propagation.py
index 6a87f2dfa..ef97569d5 100755
--- a/benchmarks/tst3d_s_o4_em_propagation.py
+++ b/benchmarks/tst3d_s_o4_em_propagation.py
@@ -21,8 +21,6 @@
     simulation_time = Tsim,
     
     EM_boundary_conditions = [ ['silver-muller'] ],
-    
-    random_seed = smilei_mpi_rank
 )
 
 LaserGaussian3D(
diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst
index 3322c2857..5712bfeae 100644
--- a/doc/Sphinx/Overview/material.rst
+++ b/doc/Sphinx/Overview/material.rst
@@ -30,7 +30,7 @@ Papers involving Smilei
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar <https://scholar.google.com/scholar?hl=fr&as_sdt=2005&sciodt=0,5&cites=17416460455672944837&scipsc=&q=&scisbd=1>`_).
-As of April 2024, 181 papers have been published covering a broad range of topics:
+As of May 2024, at least 192 papers have been published covering a broad range of topics:
 
 * laser-plasma interaction (LPI) / inertial fusion (FCI)
 * ultra-high intensity (UHI) applications
@@ -50,13 +50,74 @@ Following is the distribution of these topics in the listed publications up to N
    Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here
    You can count the number of papers in the list with the vim command :%s/.. \[//gn. 
 
+
+.. [Sikorski2024]
+
+    P. Sikorski, A. G. R. Thomas, S. S. Bulanov, M. Zepf and D. Seipt,
+    `Novel signatures of radiation reaction in electron–laser sidescattering`,
+    `New Journal of Physics 26 063011 (2024) <https://doi.org/10.1088/1367-2630/ad4f06>`_
+
+.. [Ivanov2024b]
+
+    K. A. Ivanov, S. A. Shulyapov, D. A. Gorlova, I. P. Tsygvintsev, M. S. Krivokorytov, I. N. Tsymbalov, R. V. Volkov and A. B. Savelev,
+    `Laser-accelerated MeV-scale collimated electron bunch from a near-critical plasma of a liquid jet target`,
+    `Laser Physics Letters 21, 7 (2024) <https://doi.org/10.1088/1612-202X/ad4bb8>`_
+
+.. [Malik2024]
+
+    H. K. Malik, S. Kumar, and D. K. Singh,
+    `Effect of trapezoidal plasma density region in bubble wakefield acceleration`,
+    `Physica Scripta 99, 075601 (2024) <https://doi.org/10.1088/1402-4896/ad4fe7>`_
+
+.. [Krafft2024b]
+
+    C. Krafft, P. Savoini, and F. J. Polanco-Rodríguez,
+    `Mechanisms of Fundamental Electromagnetic Wave Radiation in the Solar Wind`,
+    `The Astrophysical Journal Letters 967, 2 (2024) <https://doi.org/10.3847/2041-8213/ad47b5>`_
+
+.. [Salgado2024]
+
+    F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf,
+    `All-optical source size and emittance measurements of laser-accelerated electron beams`,
+    `Physical Review Accelerators and Beams 27, 052803 (2024) <https://doi.org/10.1103/PhysRevAccelBeams.27.052803>`_
+
+.. [Ivanov2024a]
+
+    K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev,
+    `Laser-driven pointed acceleration of electrons with preformed plasma lens`,
+    `Physical Review Accelerators and Beams 27, 051301 (2024) <https://doi.org/10.1038/s41598-024-61041-2>`_
+       
+.. [Timmis2024]
+
+    R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys,
+    `Attosecond and nano‐Coulomb electron bunches via the Zero Vector Potential mechanism`,
+    `Scientific Reports volume 14, 10805 (2024) <https://doi.org/10.1038/s41598-024-61041-2>`_
+
+.. [Azamoum2024]
+
+    Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza,
+    `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`,
+    `Light: Science & Applications 13, 109 (2024) <https://doi.org/10.1038/s41377-024-01444-y>`_
+
+.. [Pan2024]
+
+    Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma,
+    `Electron acceleration and x-ray generation from near-critical-density carbon nanotube foams driven by moderately relativistic lasers`,
+    `Physics of Plasmas 31, 043108 (2024) <https://doi.org/10.1063/5.0202843>`_
+
+.. [Yao2024]
+
+    W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle,  G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs,
+    `Optimizing laser coupling, matter heating, and particle acceleration from solids using multiplexed ultraintense lasers`,
+    `Matter and Radiation at Extremes 9, 047202 (2024) <https://doi.org/10.1063/5.0184919>`_
+
 .. [Luo2024]
 
     M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp,
     `Control of autoresonant plasma beat-wave wakefield excitation`,
-    `Phys. Rev. Research 6, 013338 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013338>`_
+    `Physical Review Research 6, 013338 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013338>`_
 
-.. [Krafft2024]
+.. [Krafft2024a]
 
     C. Krafft and P. Savoini,
     `Electrostatic Wave Decay in the Randomly Inhomogeneous Solar Wind`,
@@ -127,7 +188,13 @@ Following is the distribution of these topics in the listed publications up to N
     A. Seidel, B. Lei, C. Zepter, M. C. Kaluza, A. Sävert, M. Zepf, and D. Seipt,
     `Polarization and CEP dependence of the transverse phase space in laser driven accelerators`,
     `Physical Review Research 6, 013056 (2024) <https://doi.org/10.1103/PhysRevResearch.6.013056>`_
-    
+
+.. [Krishnamurthy2023]
+
+    S. Krishnamurthy, S. Chintalwad, A. P. L. Robinson, R. M. G. M. Trines, and B. Ramakrishna,
+    `Observation of proton modulations in laser–solid interaction`,
+    `Plasma Physics and Controlled Fusion 65 085020 (2023) <https://doi.org/10.1088/1361-6587/ace4f1>`_
+            
 .. [Gao2023b]
 
     X. Gao,
@@ -216,7 +283,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     E. Starodubtseva, I. Tsymbalov, D. Gorlova, K. Ivanov, and A. Savel'ev,
     `Low energy electron injection for direct laser acceleration`,
-    `Phys. Plasmas 30, 083105 (2023) <https://doi.org/10.1063/5.0155196>`_
+    `Physics of Plasmas 30, 083105 (2023) <https://doi.org/10.1063/5.0155196>`_
 
 .. [Maffini2023]
 
@@ -228,7 +295,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     S. Yu. Gus'kov, Ph. Korneev, and M. Murakami,
     `Laser-driven electrodynamic implosion of fast ions in a thin shell`,
-    `Matter Radiat. Extremes 8, 056602 (2023) <https://doi.org/10.1063/5.0156113>`_
+    `Matter and Radiation at Extremes 8, 056602 (2023) <https://doi.org/10.1063/5.0156113>`_
 
 .. [RezaeiPandari2023]
 
@@ -240,19 +307,19 @@ Following is the distribution of these topics in the listed publications up to N
 
       J. Jonnerby, A. von Boetticher, J. Holloway, L. Corner, A. Picksley, A. J. Ross, R. J. Shalloo , C. Thornton, N. Bourgeois, R. Walczak, and S. M. Hooker,
       `Measurement of the decay of laser-driven linear plasma wakefields`,
-      `Phys. Rev. E 108, 055211  (2023) <https://link.aps.org/doi/10.1103/PhysRevE.108.055211>`_
+      `Physical Review E 108, 055211  (2023) <https://link.aps.org/doi/10.1103/PhysRevE.108.055211>`_
          
 .. [Drobniak2023]
 
       P. Drobniak, E. Baynard, C. Bruni, K. Cassou, C. Guyot, G. Kane, S. Kazamias, V. Kubytskyi, N. Lericheux, B. Lucas, M. Pittman, F. Massimo, A. Beck, A. Specka, P. Nghiem, and D. Minenna,
       `Random scan optimization of a laser-plasma electron injector based on fast particle-in-cell simulations`,
-      `Phys. Rev. Accel. Beams 26, 091302 (2023) <https://doi.org/10.1103/PhysRevAccelBeams.26.091302>`_
+      `Physical Review Accelerators and Beams 26, 091302 (2023) <https://doi.org/10.1103/PhysRevAccelBeams.26.091302>`_
       
 .. [Bukharskii2023]
 
        N. Bukharskii and Ph. Korneev,
        `Intense widely controlled terahertz radiation from laser-driven wires`,
-       `Matter Radiat. Extremes 8, 044401 (2023) <https://doi.org/10.1063/5.0142083>`_
+       `Matter and Radiation at Extremes 8, 044401 (2023) <https://doi.org/10.1063/5.0142083>`_
       
 .. [Schmitz2023]
 
@@ -276,7 +343,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     X. Gao,
     `Ionization dynamics of sub-micrometer-sized clusters in intense ultrafast laser pulses`,
-    `Phys. Plasmas 30, 052102 (2023) <https://doi.org/10.1063/5.0143356>`_
+    `Physics of Plasmas 30, 052102 (2023) <https://doi.org/10.1063/5.0143356>`_
     
 .. [Krafft2023]
 
@@ -294,7 +361,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     A. Ghizzo, D. Del Sarto, and H. Betar,
     `Collisionless Heating Driven by Vlasov Filamentation in a Counterstreaming Beams Configuration`,
-    `Phys. Rev. Lett. 131, 035101 (2023) <https://doi.org/10.1103/PhysRevLett.131.035101>`_
+    `Physical Review Letters 131, 035101 (2023) <https://doi.org/10.1103/PhysRevLett.131.035101>`_
      
 .. [Yang2023]
 
@@ -306,31 +373,31 @@ Following is the distribution of these topics in the listed publications up to N
 
   W. Yao, A. Fazzini, S.N. Chen, K. Burdonov, J. Béard, M. Borghesi, A. Ciardi, M. Miceli, S. Orlando, X. Ribeyre, E. d'Humières and J. Fuchs,
   `Investigating particle acceleration dynamics in interpenetrating magnetized collisionless super-critical shocks`,
-  `J. Plasma Phys. 89, 915890101 (2023) <http://dx.doi.org/10.1017/S002237782300003X>`_
+  `Journal of Plasma Physics 89, 915890101 (2023) <http://dx.doi.org/10.1017/S002237782300003X>`_
 
 .. [Pak2023]
 
   T. Pak, M. Rezaei-Pandari, S. B. Kim, G. Lee, D. H. Wi, C. I. Hojbota, M. Mirzaie, H. Kim, J. H. Sung, S. K. Lee, C. Kang and K.-Y. Kim,
   `Multi-millijoule terahertz emission from laser-wakefield-accelerated electrons`,
-  `Light Sci Appl 12, 37 (2023) <http://dx.doi.org/10.1038/s41377-022-01068-0>`_
+  `Light: Science and Applications 12, 37 (2023) <http://dx.doi.org/10.1038/s41377-022-01068-0>`_
 
 .. [Istokskaia2023]
 
   V. Istokskaia, M. Tosca, L. Giuffrida, J. Psikal, F. Grepl, V. Kantarelou, S. Stancek, S. Di Siena, A. Hadjikyriacou, A. McIlvenny, Y. Levy, J. Huynh, M. Cimrman, P. Pleskunov, D. Nikitin, A. Choukourov, F. Belloni, A. Picciotto, S. Kar, M. Borghesi, A. Lucianetti, T. Mocek and D. Margarone,
   `A multi-MeV alpha particle source via proton-boron fusion driven by a 10-GW tabletop laser`,
-  `Commun Phys 6, 27 (2023) <http://dx.doi.org/10.1038/s42005-023-01135-x>`_
+  `Communications Physics 6, 27 (2023) <http://dx.doi.org/10.1038/s42005-023-01135-x>`_
 
 .. [Yoon2023]
 
   Y. D. Yoon, D. E. Wendel and G. S. Yun,
   `Equilibrium selection via current sheet relaxation and guide field amplification`,
-  `Nat Commun 14, 139 (2023) <http://dx.doi.org/10.1038/s41467-023-35821-9>`_
+  `Nature Communications 14, 139 (2023) <http://dx.doi.org/10.1038/s41467-023-35821-9>`_
 
 .. [Galbiati2023]
 
    M. Galbiati, A. Formenti, M. Grech and M. Passoni,
    `Numerical investigation of non-linear inverse Compton scattering in double-layer targets`,
-   `Front. Phys. 11, fphy.2023.1117543 (2023) <http://dx.doi.org/10.3389/fphy.2023.1117543>`_
+   `Frontiers in Physics 11, fphy.2023.1117543 (2023) <http://dx.doi.org/10.3389/fphy.2023.1117543>`_
 
 .. [Sakai2023]
 
@@ -342,7 +409,7 @@ Following is the distribution of these topics in the listed publications up to N
 
    A. Golovanov, I. Yu. Kostyukov, A. Pukhov and V. Malka,
    `Energy-Conserving Theory of the Blowout Regime of Plasma Wakefield`,
-   `Phys. Rev. Lett. 130, 105001 (2023) <http://dx.doi.org/10.1103/PhysRevLett.130.105001>`_
+   `Physical Review Letters 130, 105001 (2023) <http://dx.doi.org/10.1103/PhysRevLett.130.105001>`_
 
 .. [Miethlinger2023]
 
@@ -354,13 +421,13 @@ Following is the distribution of these topics in the listed publications up to N
 
     C. Zepter, A. Seidel, M. Zepf, M. C. Kaluza and A. Sävert,
     `Role of spatiotemporal couplings in stimulated Raman side scattering`,
-    `Phys. Rev. Research 5, L012023 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.L012023>`_
+    `Physical Review Research 5, L012023 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.L012023>`_
     
 .. [Marini2023]
 
     S. Marini, M. Grech, P. S. Kleij, M. Raynaud and C. Riconda,
     `Electron acceleration by laser plasma wedge interaction`,
-    `Phys. Rev. Research 5, 013115 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.013115>`_
+    `Physical Review Research 5, 013115 (2023) <http://dx.doi.org/10.1103/PhysRevResearch.5.013115>`_
 
 .. [Blackman2022]
 
@@ -420,7 +487,7 @@ Following is the distribution of these topics in the listed publications up to N
 
      D. Margarone, J. Bonvalet, L. Giuffrida, A. Morace, V. Kantarelou, M. Tosca, D. Raffestin, P. Nicolai, A. Picciotto, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Batani,
      `In-Target Proton–Boron Nuclear Fusion Using a PW-Class Laser`,
-     `Appl. Sci. 12(3), 1444 (2022) <https://doi.org/10.3390/app12031444>`_
+     `Appled Sciences 12(3), 1444 (2022) <https://doi.org/10.3390/app12031444>`_
             
 .. [Kochetkov2022]
 
@@ -432,13 +499,13 @@ Following is the distribution of these topics in the listed publications up to N
 
      A. Oudin, A. Debayle, C. Ruyer, D. Benisti,
      `Cross-beam energy transfer between spatially smoothed laser beams`,
-     `Phys. Plasmas 29, 112112 (2022) <https://doi.org/10.1063/5.0109511>`_
+     `Physics of Plasmas 29, 112112 (2022) <https://doi.org/10.1063/5.0109511>`_
           
 .. [Chen2022]
 
      Q. Chen, D. Maslarova, J. Wang, S. Li, and D. Umstadter,
      `Injection of electron beams into two laser wakefields and generation of electron rings`,
-     `Phys. Rev. E 106, 055202 (2022) <https://doi.org/10.1103/PhysRevE.106.055202>`_
+     `Physical Review E 106, 055202 (2022) <https://doi.org/10.1103/PhysRevE.106.055202>`_
 
 .. [Kumar2022b]
 
@@ -450,7 +517,7 @@ Following is the distribution of these topics in the listed publications up to N
 
     S. Kumar, D. K. Singh and H. K. Malik,
     `Comparative study of ultrashort single-pulse and multi-pulse driven laser wakefield acceleration`,
-    `Laser Phys. Lett. 20, 026001 (2022) <http://dx.doi.org/10.1088/1612-202X/aca978>`_
+    `Laser Physics Letters 20, 026001 (2022) <http://dx.doi.org/10.1088/1612-202X/aca978>`_
 
 .. [Miloshevsky2022]
 
@@ -474,25 +541,25 @@ Following is the distribution of these topics in the listed publications up to N
 
     I. Ouatu, B. T. Spiers, R. Aboushelbaya, Q. Feng, M. W. von der Leyen, R. W. Paddock, R. Timmis, C. Ticos, K. M. Krushelnick and P. A. Norreys,
     `Ionization states for the multipetawatt laser-QED regime`,
-    `Phys. Rev. E 106, 015205 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.015205>`_
+    `Physical Review E 106, 015205 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.015205>`_
 
 .. [Beth2022]
 
     A. Beth, H. Gunell, C. Simon Wedlund, C. Goetz, H. Nilsson and M. Hamrin,
     `First investigation of the diamagnetic cavity boundary layer with a 1D3V PIC simulation`,
-    `A&A 667, A143 (2022) <http://dx.doi.org/10.1051/0004-6361/202243209>`_
+    `Astronomy & Astrophysics 667, A143 (2022) <http://dx.doi.org/10.1051/0004-6361/202243209>`_
 
 .. [Guo2022]
 
     Y. Guo, X. Geng, L. Ji, B. Shen and R. Li,
     `Improving the accuracy of hard photon emission by sigmoid sampling of the quantum-electrodynamic table in particle-in-cell Monte Carlo simulations`,
-    `Phys. Rev. E 105, 025309 (2022) <http://dx.doi.org/10.1103/PhysRevE.105.025309>`_
+    `Physical Review E 105, 025309 (2022) <http://dx.doi.org/10.1103/PhysRevE.105.025309>`_
 
 .. [Pae2022]
 
-    K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam,
+    K. H. Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam,
     `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`,
-    `Plasma Phys. Control. Fusion 64, 055013 (2022) <http://dx.doi.org/10.1088/1361-6587/ac5a0a>`_
+    `Plasma Physics and Controlled Fusion 64, 055013 (2022) <http://dx.doi.org/10.1088/1361-6587/ac5a0a>`_
 
       
 .. [Zhang2022a]
@@ -505,43 +572,43 @@ Following is the distribution of these topics in the listed publications up to N
 
    Q. Han, X. Geng, B. Shen, Z. Xu and L. Ji,
    `Ultra-fast polarization of a thin electron layer in the rotational standing-wave field driven by double ultra-intense laser pulses`,
-   `New J. Phys. 24, 063013 (2022) <http://dx.doi.org/10.1088/1367-2630/ac740f>`_
+   `New Journal of Physics 24, 063013 (2022) <http://dx.doi.org/10.1088/1367-2630/ac740f>`_
 
 .. [Gothel2022]
 
    I. Göthel, C. Bernert, M. Bussmann, M. Garten, T. Miethlinger, M. Rehwald, K. Zeil, T. Ziegler, T. E. Cowan, U. Schramm and T. Kluge,
    `Optimized laser ion acceleration at the relativistic critical density surface`,
-   `Plasma Phys. Control. Fusion 64, 044010 (2022) <http://dx.doi.org/10.1088/1361-6587/ac4e9f>`_
+   `Plasma Physics and Controlled Fusion 64, 044010 (2022) <http://dx.doi.org/10.1088/1361-6587/ac4e9f>`_
 
 .. [Fazzini2022]
 
    A. Fazzini, W. Yao, K. Burdonov, J. Béard, S. N. Chen, A. Ciardi, E. d’Humières, R. Diab, E. D. Filippov, S. Kisyov, V. Lelasseux, M. Miceli, Q. Moreno, S. Orlando, S. Pikuz, X. Ribeyre, M. Starodubtsev, R. Zemskov and J. Fuchs,
    `Particle energization in colliding subcritical collisionless shocks investigated in the laboratory`,
-   `A&A 665, A87 (2022) <http://dx.doi.org/10.1051/0004-6361/202243277>`_
+   `Astronomy & Astrophysics 665, A87 (2022) <http://dx.doi.org/10.1051/0004-6361/202243277>`_
 
 .. [Bykov2022]
 
   A. M. Bykov, S. M. Osipov and V. I. Romanskii,
   `Acceleration of Cosmic Rays to Energies above 1015 eV by Transrelativistic Shocks`,
-  `J. Exp. Theor. Phys. 134, 487-497 (2022) <http://dx.doi.org/10.1134/S1063776122040161>`_
+  `Journal of Experimental and Theoretical Physics 134, 487-497 (2022) <http://dx.doi.org/10.1134/S1063776122040161>`_
 
 .. [Sundstrom2022]
 
   A. Sundström, M. Grech, I. Pusztai and C. Riconda,
   `Stimulated-Raman-scattering amplification of attosecond XUV pulses with pulse-train pumps and application to local in-depth plasma-density measurement`,
-  `Phys. Rev. E 106, 045208 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.045208>`_
+  `Physical Review E 106, 045208 (2022) <http://dx.doi.org/10.1103/PhysRevE.106.045208>`_
 
 .. [Krafft2022b]
 
   C. Krafft and P. Savoini,
   `Third and Fourth Harmonics of Electromagnetic Emissions by a Weak Beam in a Solar Wind Plasma with Random Density Fluctuations`,
-  `ApJL 934, L28 (2022) <http://dx.doi.org/10.3847/2041-8213/ac7f28>`_
+  `The Astrophysical Journal Letters 934, L28 (2022) <http://dx.doi.org/10.3847/2041-8213/ac7f28>`_
 
 .. [Krafft2022a]
 
   C. Krafft and P. Savoini,
   `Fundamental Electromagnetic Emissions by a Weak Electron Beam in Solar Wind Plasmas with Density Fluctuations`,
-  `ApJL 924, L24 (2022) <http://dx.doi.org/10.3847/2041-8213/ac46a7>`_
+  `The Astrophysical Journal Letters 924, L24 (2022) <http://dx.doi.org/10.3847/2041-8213/ac46a7>`_
 
 .. [Kong2022]
 
@@ -553,7 +620,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   C. Davidson, Z.-M. Sheng, T. Wilson and P. McKenna,
   `Theoretical and computational studies of the Weibel instability in several beam–plasma interaction configurations`,
-  `J. Plasma Phys. 88, 905880206 (2022) <http://dx.doi.org/10.1017/S0022377822000253>`_
+  `Journal of Plasma Physics 88, 905880206 (2022) <http://dx.doi.org/10.1017/S0022377822000253>`_
   
 .. [Glek2022]
 
@@ -565,7 +632,7 @@ Following is the distribution of these topics in the listed publications up to N
 
    D. Umstadter
    `Controlled Injection of Electrons for Improved Performance of Laser-Wakefield Acceleration`,
-   `United States: N. p., (2022) <http://dx.doi.org/10.2172/1838680>`_
+   `United States Department of Energy Technical Report (2022) <http://dx.doi.org/10.2172/1838680>`_
 
 .. [Massimo2022]
 
@@ -584,7 +651,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   P. K. Singh, F.-Y. Li, C.-K. Huang, A. Moreau, R. Hollinger, A. Junghans, A. Favalli, C. Calvi, S. Wang, Y. Wang, H. Song, J. J. Rocca, R. E. Reinovsky and S. Palaniyappan,
   `Vacuum laser acceleration of super-ponderomotive electrons using relativistic transparency injection`,
-  `Nat Commun 13, 54 (2022) <http://dx.doi.org/10.1038/s41467-021-27691-w>`_
+  `Nature Communications 13, 54 (2022) <http://dx.doi.org/10.1038/s41467-021-27691-w>`_
 
 .. [Lobet2022]
 
@@ -615,13 +682,13 @@ Following is the distribution of these topics in the listed publications up to N
 
     P. Tomassini, F. Massimo, L. Labate and L. A. Gizzi,
     `Accurate electron beam phase-space theory for ionization-injection schemes driven by laser pulses`,
-    `High Pow Laser Sci Eng 10, e15 (2021) <http://dx.doi.org/10.1017/hpl.2021.56>`_
+    `High Power Laser Science and Engineering 10, e15 (2021) <http://dx.doi.org/10.1017/hpl.2021.56>`_
     
 .. [Meinhold2021]
 
   T. A. Meinhold and N. Kumar,
   `Radiation pressure acceleration of protons from structured thin-foil targets`,
-  `J. Plasma Phys. 87, 905870607 (2021) <http://dx.doi.org/10.1017/S0022377821001070>`_
+  `Journal of Plasma Physics 87, 905870607 (2021) <http://dx.doi.org/10.1017/S0022377821001070>`_
 
 .. [Bonvalet2021b]
 
@@ -633,13 +700,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   Y. Shi, D. R. Blackman and A. Arefiev,
   `Electron acceleration using twisted laser wavefronts`,
-  `Plasma Phys. Control. Fusion 63, 125032 (2021) <http://dx.doi.org/10.1088/1361-6587/ac318d>`_
+  `Plasma Physics and Controlled Fusion 63, 125032 (2021) <http://dx.doi.org/10.1088/1361-6587/ac318d>`_
 
 .. [Kumar2021]
 
   N. Kumar and B. Reville,
   `Nonthermal Particle Acceleration at Highly Oblique Nonrelativistic Shocks`,
-  `ApJL 921, L14 (2021) <http://dx.doi.org/10.3847/2041-8213/ac30e0>`_
+  `The Astrophysical Journal Letters 921, L14 (2021) <http://dx.doi.org/10.3847/2041-8213/ac30e0>`_
 
 .. [Ghaith2021]
 
@@ -651,13 +718,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   V. Horný and L. Veisz,
   `Generation of single attosecond relativistic electron bunch from intense laser interaction with a nanosphere`,
-  `Plasma Phys. Control. Fusion 63, 125025 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2996>`_
+  `Plasma Physics and Controlled Fusion 63, 125025 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2996>`_
 
 .. [Krafft2021]
 
   C. Krafft and P. Savoini,
   `Second Harmonic Electromagnetic Emissions by an Electron Beam in Solar Wind Plasmas with Density Fluctuations`,
-  `ApJL 917, L23 (2021) <http://dx.doi.org/10.3847/2041-8213/ac1795>`_
+  `The Astrophysical Journal Letters 917, L23 (2021) <http://dx.doi.org/10.3847/2041-8213/ac1795>`_
 
 .. [Khalilzadeh2021c]
 
@@ -681,7 +748,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   Y. Shou, D. Wang, P. Wang, J. Liu, Z. Cao, Z. Mei, S. Xu, Z. Pan, D. Kong, G. Qi, Z. Liu, Y. Liang, Z. Peng, Y. Gao, S. Chen, J. Zhao, Y. Zhao, H. Xu, J. Zhao, Y. Wu, X. Yan and W. Ma,
   `High-efficiency generation of narrowband soft x rays from carbon nanotube foams irradiated by relativistic femtosecond lasers`,
-  `Opt. Lett. 46, 3969 (2021) <http://dx.doi.org/10.1364/OL.432817>`_
+  `Optics Letters 46, 3969 (2021) <http://dx.doi.org/10.1364/OL.432817>`_
 
 .. [Khalilzadeh2021b]
 
@@ -693,67 +760,67 @@ Following is the distribution of these topics in the listed publications up to N
 
   H. Hosseinkhani, M. Pishdast, J. Yazdanpanah and S. A. Ghasemi,
   `Investigation of the classical and quantum radiation reaction effect on interaction of ultra high power laser with near critical plasma`,
-  `J. Nuclear Sci. Technol. 42, 27-35 (2021) <http://dx.doi.org/10.24200/nst.2021.1197>`_
+  `Journal of Nuclear Science, Engineering and Technology 42, 27-35 (2021) <http://dx.doi.org/10.24200/nst.2021.1197>`_
 
 .. [MercuriBaron2021]
 
   A. Mercuri-Baron, M. Grech, F. Niel, A. Grassi, M. Lobet, A. Di Piazza and C. Riconda,
   `Impact of the laser spatio-temporal shape on Breit–Wheeler pair production`,
-  `New J. Phys. 23, 085006 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1975>`_
+  `New Journal of Physics 23, 085006 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1975>`_
 
 .. [Peng2021]
 
   H. Peng, C. Riconda, S. Weber, C.T. Zhou and S.C. Ruan,
   `Frequency Conversion of Lasers in a Dynamic Plasma Grating`,
-  `Phys. Rev. Applied 15, 054053 (2021) <http://dx.doi.org/10.1103/PhysRevApplied.15.054053>`_
+  `Physical Review Applied 15, 054053 (2021) <http://dx.doi.org/10.1103/PhysRevApplied.15.054053>`_
 
 .. [Shi2021a]
 
   Y. Shi, D. Blackman, D. Stutman and A. Arefiev,
   `Generation of Ultrarelativistic Monoenergetic Electron Bunches via a Synergistic Interaction of Longitudinal Electric and Magnetic Fields of a Twisted Laser`,
-  `Phys. Rev. Lett. 126, 234801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.234801>`_
+  `Physical Review Letters 126, 234801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.234801>`_
 
 .. [Bonvalet2021a]
 
   J. Bonvalet, Ph. Nicolaï, D. Raffestin, E. D'humieres, D. Batani, V. Tikhonchuk, V. Kantarelou, L. Giuffrida, M. Tosca, G. Korn, A. Picciotto, A. Morace, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Margarone,
   `Energetic α-particle sources produced through proton-boron reactions by high-energy high-intensity laser beams`,
-  `Phys. Rev. E 103, 053202 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.053202>`_
+  `Physical Review E 103, 053202 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.053202>`_
 
 .. [Shekhanov2021]
 
   S. A. Shekhanov and V. T. Tikhonchuk,
   `SRS-SBS competition and nonlinear laser energy absorption in a high temperature plasma`,
-  `Plasma Phys. Control. Fusion 63, 115016 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2614>`_
+  `Plasma Physics and Controlled Fusion 63, 115016 (2021) <http://dx.doi.org/10.1088/1361-6587/ac2614>`_
 
 .. [Psikal2021]
 
-  J Psikal,
+  J. Psikal,
   `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`,
-  `Plasma Phys. Control. Fusion 63, 064002 (2021) <http://dx.doi.org/10.1088/1361-6587/abf448>`_
+  `Plasma Physics and Controlled Fusion 63, 064002 (2021) <http://dx.doi.org/10.1088/1361-6587/abf448>`_
 
 .. [Yoon2021b]
 
   Y. D. Yoon, G. S. Yun, D. E. Wendel and J. L. Burch,
   `Collisionless relaxation of a disequilibrated current sheet and implications for bifurcated structures`,
-  `Nat Commun 12, 3774 (2021) <http://dx.doi.org/10.1038/s41467-021-24006-x>`_
+  `Nature Communications 12, 3774 (2021) <http://dx.doi.org/10.1038/s41467-021-24006-x>`_
 
 .. [Lavorenti2021]
 
   F. Lavorenti, P. Henri, F. Califano, S. Aizawa and N. André,
   `Electron acceleration driven by the lower-hybrid-drift instability. An extended quasilinear model`,
-  `A&A 652, 202141049 (2021) <http://dx.doi.org/10.1051/0004-6361/202141049>`_
+  `Astronomy & Astrophysics 652, 202141049 (2021) <http://dx.doi.org/10.1051/0004-6361/202141049>`_
 
 .. [Golovanov2021]
 
-  A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov,
+  A. A. Golovanov, I. Y. Kostyukov, L. Reichwein, J. Thomas and A. Pukhov,
   `Excitation of strongly nonlinear plasma wakefield by electron bunches`,
-  `Plasma Phys. Control. Fusion 63, 085004 (2021) <http://dx.doi.org/10.1088/1361-6587/ac0352>`_
+  `Plasma Physics and Controlled Fusion 63, 085004 (2021) <http://dx.doi.org/10.1088/1361-6587/ac0352>`_
 
 .. [Jirka2021]
 
   M. Jirka, P. Sasorov, S. S. Bulanov, G. Korn, B. Rus and S. V. Bulanov,
   `Reaching high laser intensity by a radiating electron`,
-  `Phys. Rev. A 103, 053114 (2021) <http://dx.doi.org/10.1103/PhysRevA.103.053114>`_
+  `Physical Review A 103, 053114 (2021) <http://dx.doi.org/10.1103/PhysRevA.103.053114>`_
 
 .. [Marques2021]
 
@@ -783,7 +850,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   G. Cantono, A. Permogorov, J. Ferri, E. Smetanina, A. Dmitriev, A. Persson, T. Fülöp and C.-G. Wahlström,
   `Laser-driven proton acceleration from ultrathin foils with nanoholes`,
-  `Sci Rep 11, 5006 (2021) <http://dx.doi.org/10.1038/s41598-021-84264-z>`_
+  `Scientific Reports 11, 5006 (2021) <http://dx.doi.org/10.1038/s41598-021-84264-z>`_
 
 .. [Perez2021]
 
@@ -801,13 +868,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   A. Sampath, X. Davoine, S. Corde, L. Gremillet, M. Gilljohann, M. Sangal, C. H. Keitel, R. Ariniello, J. Cary, H. Ekerfelt, C. Emma, F. Fiuza, H. Fujii, M. Hogan, C. Joshi, A. Knetsch, O. Kononenko, V. Lee, M. Litos, K. Marsh, Z. Nie, B. O’Shea, J. R. Peterson, P. San Miguel Claveria, D. Storey, Y. Wu, X. Xu, C. Zhang and M. Tamburini,
   `Extremely Dense Gamma-Ray Pulses in Electron Beam-Multifoil Collisions`,
-  `Phys. Rev. Lett. 126, 064801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.064801>`_
+  `Physical Review Letters 126, 064801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.064801>`_
 
 .. [Marini2021a]
 
   S. Marini, P. S. Kleij, F. Pisani, F. Amiranoff, M. Grech, A. Macchi, M. Raynaud and C. Riconda,
   `Ultrashort high energy electron bunches from tunable surface plasma waves driven with laser wavefront rotation`,
-  `Phys. Rev. E 103, L021201 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.L021201>`_
+  `Physical Review E 103, L021201 (2021) <http://dx.doi.org/10.1103/PhysRevE.103.L021201>`_
 
 .. [Yao2021]
 
@@ -819,14 +886,14 @@ Following is the distribution of these topics in the listed publications up to N
 
   E. G. Gelfer, A. M, Fedotov and S. Weber,
   `Radiation induced acceleration of ions in a laser irradiated transparent foil`,
-  `New J. Phys. 23, 095002 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1a97>`_
+  `New Journal of Physics 23, 095002 (2021) <http://dx.doi.org/10.1088/1367-2630/ac1a97>`_
   `arXiv:1907.02621 <https://arxiv.org/abs/1907.02621>`_
 
 .. [Siminos2021]
 
   E. Siminos, I. Thiele and C. Olofsson,
   `Laser Wakefield Driven Generation of Isolated Carrier-Envelope-Phase Tunable Intense Subcycle Pulses`,
-  `Phys. Rev. Lett. 126, 044801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.044801>`_
+  `Physical Review Letters 126, 044801 (2021) <http://dx.doi.org/10.1103/PhysRevLett.126.044801>`_
   `arXiv:1902.05014 <https://arxiv.org/abs/1902.05014>`_
 
 .. [Budriga2020]
@@ -839,13 +906,13 @@ Following is the distribution of these topics in the listed publications up to N
 
   P. A. P. Nghiem, R. Assmann, A. Beck et al., 
   `Toward a plasma-based accelerator at high beam energy with high beam charge and high beam quality`,
-  `Phys. Rev. Accel. Beams 23, 031301 (2020) <https://doi.org/10.1103/PhysRevAccelBeams.23.031301>`_
+  `Physical Review Accelerators and Beams 23, 031301 (2020) <https://doi.org/10.1103/PhysRevAccelBeams.23.031301>`_
 
 .. [Pisarczyk2020]
 
   T. Pisarczyk, M. Kalal, S. Yu. Gus'kov et al.,
   `Hot electron retention in laser plasma created under terawatt subnanosecond irradiation of Cu targets`,
-  `Plasma Phys. Control. Fusion 62, 115020 (2020) <https://doi.org/10.1088/1361-6587/abb74b>`_
+  `Plasma Physics and Controlled Fusion 62, 115020 (2020) <https://doi.org/10.1088/1361-6587/abb74b>`_
 
 .. [Pagano2020]
 
@@ -863,25 +930,25 @@ Following is the distribution of these topics in the listed publications up to N
 
   H. Peng, C. Riconda, M. Grech, C.-T. Zhou and S. Weber,
   `Dynamical aspects of plasma gratings driven by a static ponderomotive potential`,
-  `Plasma Phys. Control. Fusion 62, 115015 (2020) <https://doi.org/10.1088/1361-6587/abb3aa>`_
+  `Plasma Physics and Controlled Fusion 62, 115015 (2020) <https://doi.org/10.1088/1361-6587/abb3aa>`_
 
 .. [Glek2020]
 
   P. B. Glek, A. A. Voronin, V. Ya. Panchenko and A. M. Zheltikov,
   `Relativistic electron bunches locked to attosecond optical field waveforms: an attosecond light–matter bound state`,
-  `Laser Phys. Lett. 17 055401 (2020) <https://doi.org/10.1088/1612-202X/ab7827>`_
+  `Laser Physics Letters 17 055401 (2020) <https://doi.org/10.1088/1612-202X/ab7827>`_
 
 .. [Margarone2020]
 
   D. Margarone, A. Morace, J. Bonvalet et al.,
   `Generation of α-Particle Beams With a Multi-kJ, Peta-Watt Class Laser System`,
-  `Front. Phys. 8, 343 (2020) <https://doi.org/10.3389/fphy.2020.00343>`_
+  `Frontiers in Physics 8, 343 (2020) <https://doi.org/10.3389/fphy.2020.00343>`_
 
 .. [Sinha2020]
 
   U. Sinha and N. Kumar,
   `Pair-beam propagation in a magnetized plasma for modeling the polarized radiation emission from gamma-ray bursts in laboratory astrophysics experiments`,
-  `Phys. Rev. E 101, 063204 (2020) <https://doi.org/10.1103/PhysRevE.101.063204>`_
+  `Physical Review E 101, 063204 (2020) <https://doi.org/10.1103/PhysRevE.101.063204>`_
 
 .. [Mitrofanov2020]
 
@@ -893,81 +960,81 @@ Following is the distribution of these topics in the listed publications up to N
 
   B. T. Spiers, M. P. Hill, C. Brown, L. Ceurvorst, N. Ratan, A. F. Savin, P. Allan, E. Floyd, J. Fyrth, L. Hobbs, S. James, J. Luis, M. Ramsay, N. Sircombe, J. Skidmore, R. Aboushelbaya, M. W. Mayr, R. Paddock, R. H. W. Wang and P. A. Norreys,
   `Whole-beam self-focusing in fusion-relevant plasma`,
-  `Phil. Trans. R. Soc. A379, 20200159 <https://doi.org/10.1098/rsta.2020.0159>`_
+  `Philosophical Transactions of the Royal Society A379, 20200159 <https://doi.org/10.1098/rsta.2020.0159>`_
 
 .. [Derouillat2020]
 
   J. Derouillat and A. Beck,
   `Single Domain Multiple Decompositions for Particle-in-Cell simulations`,
-  `J. Phys.: Conf. Ser. 1596, 012052 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012052>`_
+  `Journal of Physics: Conference Series 1596, 012052 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012052>`_
   `arXiv:1912.04064 <https://arxiv.org/abs/1912.04064>`_
 
 .. [Zemzemi2020]
 
   I. Zemzemi, F. Massimo and A. Beck,
   `Azimuthal decomposition study of a realistic laser profile for efficient modeling of Laser WakeField Acceleration`,
-  `J. Phys.: Conf. Ser. 1596, 012055 (2020) <https://doi.org/10.1088/1742-6596/1596/1/012054>`_
+  `Journal of Physics: Conference Series 1596, 012055 (2020) <https://doi.org/10.1088/1742-6596/1596/1/012054>`_
 
 .. [Massimo2020b]
 
   F. Massimo, I. Zemzemi, A. Beck, J. Derouillat and A. Specka,
   `Efficient cylindrical envelope modeling for laser wakefield acceleration`,
-  `J. Phys.: Conf. Ser. 1596, 012054 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012055>`_
+  `Journal of Physics: Conference Series 1596, 012054 (2020) <http://dx.doi.org/10.1088/1742-6596/1596/1/012055>`_
   `arXiv:1912.04674 <https://arxiv.org/abs/1912.04674>`_
 
 .. [Massimo2020a]
 
   F. Massimo, A. Beck, J. Derouillat, I. Zemzemi and A. Specka,
   `Numerical modeling of laser tunneling ionization in particle-in-cell codes with a laser envelope model`,
-  `Phys. Rev. E 102, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.102.033204>`_
+  `Physical Review E 102, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.102.033204>`_
   `arXiv:2006.04433 <https://arxiv.org/abs/2006.04433>`_
 
 .. [Marcowith2020]
 
   A. Marcowith, G. Ferrand, M. Grech, Z. Meliani, I. Plotnikov and R. Walder,
   `Multi-scale simulations of particle acceleration in astrophysical systems`,
-  `Living Rev Comput Astrophys 6, 1 (2020) <http://dx.doi.org/10.1007/s41115-020-0007-6>`_
+  `Living Reviews in Computational Astrophysics 6, 1 (2020) <http://dx.doi.org/10.1007/s41115-020-0007-6>`_
   `arXiv:2002.09411 <https://arxiv.org/abs/2002.09411>`_
 
 .. [Dargent2020]
 
   J. Dargent, N. Aunai, B. Lavraud, S. Toledo‐Redondo and F. Califano,
   `Simulation of Plasmaspheric Plume Impact on Dayside Magnetic Reconnection`,
-  `Geophys. Res. Lett. 47, 2019GL086546 (2020) <http://dx.doi.org/10.1029/2019GL086546>`_
+  `Geophysical Research Letters 47, 2019GL086546 (2020) <http://dx.doi.org/10.1029/2019GL086546>`_
   `arXiv:2002.02243 <https://arxiv.org/abs/2002.02243>`_
 
 .. [Sundström2020b]
 
   A. Sundström, L. Gremillet, E. Siminos and I. Pusztai,
   `Collisional effects on the electrostatic shock dynamics in thin-foil targets driven by an ultraintense short pulse laser`,
-  `Plasma Phys. Control. Fusion 62, 085015 (2020) <https://doi.org/10.1088/1361-6587/ab9a62>`_
+  `Plasma Physics and Controlled Fusion 62, 085015 (2020) <https://doi.org/10.1088/1361-6587/ab9a62>`_
 
 .. [Sundström2020a]
 
   A. Sundström, L. Gremillet, E. Siminos and I. Pusztai,
   `Fast collisional electron heating and relaxation in thin foils driven by a circularly polarized ultraintense short-pulse laser`,
-  `J. Plasma Phys. 86, 755860201 (2020) <http://dx.doi.org/10.1017/S0022377820000264>`_
+  `Journal of Plasma Physics 86, 755860201 (2020) <http://dx.doi.org/10.1017/S0022377820000264>`_
   `arXiv:1911.09562 <https://arxiv.org/abs/1911.09562>`_
 
 .. [Gelfer2020]
 
   E. G. Gelfer, A. M. Fedotov, O. Klimo and S. Weber,
   `Absorption and opacity threshold for a thin foil in a strong circularly polarized laser field`,
-  `Phys. Rev. E 101, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.101.033204>`_
+  `Physical Review E 101, 033204 (2020) <http://dx.doi.org/10.1103/PhysRevE.101.033204>`_
   `arXiv:1906.05902 <https://arxiv.org/abs/1906.05902>`_
 
 .. [Ferri2020]
 
   J. Ferri, I. Thiele, E. Siminos, L. Gremillet, E. Smetanina, A. Dmitriev, G. Cantono, C.-G. Wahlström and T. Fülöp,
   `Enhancement of laser-driven ion acceleration in non-periodic nanostructured targets`,
-  `J. Plasma Phys. 86, 905860101 (2020) <http://dx.doi.org/10.1017/S0022377819000898>`_
+  `Journal of Plasma Physics 86, 905860101 (2020) <http://dx.doi.org/10.1017/S0022377819000898>`_
   `arXiv:1905.11131 <https://arxiv.org/abs/1905.11131>`_
 
 .. [Marques2019]
 
   J.-R. Marquès, L. Lancia, T. Gangolf, M. Blecher, S. Bolaños, J. Fuchs, O. Willi, F. Amiranoff, R. L. Berger, M. Chiaramello, S. Weber, and C. Riconda,
   `Joule-Level High-Efficiency Energy Transfer to Subpicosecond Laser Pulses by a Plasma-Based Amplifier`,
-  `Phys. Rev. X 9, 021008 (2019) <https://doi.org/10.1103/PhysRevX.9.021008>`_
+  `Physical Review X 9, 021008 (2019) <https://doi.org/10.1103/PhysRevX.9.021008>`_
 
 .. [Plotnikov2019]
   I. Plotnikov and L. Sironi,
@@ -990,39 +1057,39 @@ Following is the distribution of these topics in the listed publications up to N
 
   X. S. Geng, L. L. Ji, B. F. Shen et al.,
   `Quantum reflection above the classical radiation-reaction barrier in the quantum electro-dynamics regime`,
-  `Commun. Phys. 2, 66 (2019) <https://doi.org/10.1038/s42005-019-0164-2>`_  
+  `Communications Physics 2, 66 (2019) <https://doi.org/10.1038/s42005-019-0164-2>`_  
 
 .. [Sinha2019]
 
   U. Sinha, C. H. Keitel, and N. Kumar,
   `Polarized Light from the Transportation of a Matter-Antimatter Beam in a Plasma`,
-  `Phys. Rev. Lett. 122, 204801 (2019) <https://doi.org/10.1103/PhysRevLett.122.204801>`_
+  `Physical Review Letters 122, 204801 (2019) <https://doi.org/10.1103/PhysRevLett.122.204801>`_
 
 .. [Malko2019]
 
   S. Malko, X. Vaisseau, F. Perez, D. Batani, A. Curcio, M. Ehret, J. Honrubia, K. Jakubowska, A. Morace, J. J. Santos and L. Volpe, 
   `Enhanced relativistic-electron beam collimation using two consecutive laser pulses`, 
-  `Sci Rep 9, 14061 (2019) <https://doi.org/10.1038/s41598-019-50401-y>`_
+  `Scientific Reports 9, 14061 (2019) <https://doi.org/10.1038/s41598-019-50401-y>`_
 
 .. [Peng2019]
 
   H. Peng, C. Riconda, M. Grech, J.-Q. Su and S. Weber,
   `Nonlinear dynamics of laser-generated ion-plasma gratings: A unified description`,
-  `Phys. Rev. E 100, 061201 (2019) <http://dx.doi.org/10.1103/PhysRevE.100.061201>`_
+  `Physical Review E 100, 061201 (2019) <http://dx.doi.org/10.1103/PhysRevE.100.061201>`_
   `arXiv:1911.03440 <https://arxiv.org/abs/1911.03440>`_
 
 .. [Fang2019]
 
   J. Fang, C.-Y. Lu, J.-W. Yan and H. Yu,
   `Early acceleration of electrons and protons at the nonrelativistic quasiparallel shocks with different obliquity angles`,
-  `Res. Astron. Astrophys. 19, 182 (2019) <http://dx.doi.org/10.1088/1674-4527/19/12/182>`_
+  `Research in Astronomy and Astrophysics 19, 182 (2019) <http://dx.doi.org/10.1088/1674-4527/19/12/182>`_
   `arXiv:1908.08170 <https://arxiv.org/abs/1908.08170>`_
 
 .. [Yoon2019b]
 
   Y. Yoon and P. M. Bellan,
   `Kinetic Verification of the Stochastic Ion Heating Mechanism in Collisionless Magnetic Reconnection`,
-  `ApJ 887, L29 (2019) <http://dx.doi.org/10.3847/2041-8213/ab5b0a>`_
+  `The Astrophysical Journal Letters 887, L29 (2019) <http://dx.doi.org/10.3847/2041-8213/ab5b0a>`_
 
 .. [Yoon2019a]
 
@@ -1034,7 +1101,7 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Massimo, A. Beck, J. Derouillat, M. Grech, M. Lobet, F. Pérez, I. Zemzemi and A Specka,
   `Efficient start-to-end 3D envelope modeling for two-stage laser wakefield acceleration experiments`,
-  `Plasma Phys. Control. Fusion 61, 124001 (2019) <http://dx.doi.org/10.1088/1361-6587/ab49cf>`_
+  `Plasma Physics and Controlled Fusion 61, 124001 (2019) <http://dx.doi.org/10.1088/1361-6587/ab49cf>`_
   `arXiv:1912.04127 <https://arxiv.org/abs/1912.04127>`_
 
 .. [Beck2019]
@@ -1048,14 +1115,14 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Pérez and M. Grech,
   `Oblique-incidence, arbitrary-profile wave injection for electromagnetic simulations`,
-  `Phys. Rev. E 99, 033307 (2019) <http://dx.doi.org/10.1103/PhysRevE.99.033307>`_
+  `Physical Review E 99, 033307 (2019) <http://dx.doi.org/10.1103/PhysRevE.99.033307>`_
   `arXiv:1809.04435 <https://arxiv.org/abs/1809.04435>`_
 
 .. [Thiele2019]
 
   I. Thiele, E. Siminos and T. Fülöp,
   `Electron Beam Driven Generation of Frequency-Tunable Isolated Relativistic Subcycle Pulses`,
-  `Phys. Rev. Lett. 122, 104803 (2019) <http://dx.doi.org/10.1103/PhysRevLett.122.104803>`_
+  `Physical Review Letters 122, 104803 (2019) <http://dx.doi.org/10.1103/PhysRevLett.122.104803>`_
   `arXiv:1806.04976 <https://arxiv.org/abs/1806.04976>`_
 
 .. [Golovanov2018]
@@ -1068,19 +1135,19 @@ Following is the distribution of these topics in the listed publications up to N
 
   S. Toledo-Redondo, J. Dargent, N. Aunai, B. Lavraud, M. André, W. Li, B. Giles, P.-A. Lindvist, R. E. Ergun, C. T. Russel and J. L. Burch,
   `Perpendicular Current Reduction Caused by Cold Ions of Ionospheric Origin in Magnetic Reconnection at the Magnetopause: Particle-in-Cell Simulations and Spacecraft Observations`,
-  `Geophys. Res. Lett. 45, 10,033 (2018)  <https://doi.org/10.1029/2018GL079051>`_
+  `Geophysical Research Letters 45, 10,033 (2018)  <https://doi.org/10.1029/2018GL079051>`_
 
 .. [Gelfer2018]
 
   E. Gelfer, N. Elkina and A. Fedotov,
   `Unexpected impact of radiation friction: enhancing production of longitudinal plasma waves`,
-  `Sci. Rep. 8, 6478 (2018) <https://doi.org/10.1038/s41598-018-24930-x>`_
+  `Scientific Reports 8, 6478 (2018) <https://doi.org/10.1038/s41598-018-24930-x>`_
 
 .. [Niel2018b]
 
   F. Niel, C. Riconda, F. Amiranoff, M. Lobet, J. Derouillat, F. Pérez, T. Vinci and M. Grech,
   `From quantum to classical modeling of radiation reaction: a focus on the radiation spectrum`,
-  `Plasma Phys. Control. Fusion 60, 094002 (2018) <http://dx.doi.org/10.1088/1361-6587/aace22>`_
+  `Plasma Physics and Controlled Fusion 60, 094002 (2018) <http://dx.doi.org/10.1088/1361-6587/aace22>`_
   `arXiv:1802.02927 <https://arxiv.org/abs/1802.02927>`_
 
 .. [Plotnikov2018]
@@ -1094,21 +1161,21 @@ Following is the distribution of these topics in the listed publications up to N
 
   F. Niel, C. Riconda, F. Amiranoff, R. Duclous and M. Grech,
   `From quantum to classical modeling of radiation reaction: A focus on stochasticity effects`,
-  `Phys. Rev. E 97, 043209 (2018) <http://dx.doi.org/10.1103/PhysRevE.97.043209>`_
+  `Physical Review E 97, 043209 (2018) <http://dx.doi.org/10.1103/PhysRevE.97.043209>`_
   `arXiv:1707.02618 <https://arxiv.org/abs/1707.02618>`_
 
 .. [Grassi2017b]
 
   A. Grassi, M. Grech, F. Amiranoff, A. Macchi and C. Riconda,
   `Radiation-pressure-driven ion Weibel instability and collisionless shocks`,
-  `Phys. Rev. E 96, 033204 (2017) <http://dx.doi.org/10.1103/PhysRevE.96.033204>`_
+  `Physical Review E 96, 033204 (2017) <http://dx.doi.org/10.1103/PhysRevE.96.033204>`_
   `arXiv:1705.05402 <https://arxiv.org/abs/1705.05402>`_
 
 .. [Fedeli2017]
 
   L. Fedeli, A. Formenti, L. Cialfi, A. Sgattoni, G. Cantono and M. Passoni,
   `Structured targets for advanced laser-driven sources`,
-  `Plasma Phys. Control. Fusion 60, 014013 (2017) <http://dx.doi.org/10.1088/1361-6587/aa8a54>`_
+  `Plasma Physics and Controlled Fusion 60, 014013 (2017) <http://dx.doi.org/10.1088/1361-6587/aa8a54>`_
 
 .. [Golovanov2017]
 
@@ -1120,19 +1187,19 @@ Following is the distribution of these topics in the listed publications up to N
 
   J. Dargent, N. Aunai, B. Lavraud, S. Toledo-Redondo, M. A. Shay, P. A. Cassak and K. Malakit,
   `Kinetic simulation of asymmetric magnetic reconnection with cold ions`,
-  `J. Geophys. Res. Space Physics 122, 5290-5306 (2017) <http://dx.doi.org/10.1002/2016JA023831>`_
+  `Journal of Geophysical Research: Space Physics 122, 5290-5306 (2017) <http://dx.doi.org/10.1002/2016JA023831>`_
 
 .. [Grassi2017a]
 
   A. Grassi, M. Grech, F. Amiranoff, F. Pegoraro, A. Macchi and C. Riconda,
   `Electron Weibel instability in relativistic counterstreaming plasmas with flow-aligned external magnetic fields`,
-  `Phys. Rev. E 95, 023203 (2017) <http://dx.doi.org/10.1103/PhysRevE.95.023203>`_
+  `Physical Review E 95, 023203 (2017) <http://dx.doi.org/10.1103/PhysRevE.95.023203>`_
 
 .. [Dargent2016]
 
   J. Dargent, N. Aunai, G. Belmont, N. Dorville, B. Lavraud and M. Hesse,
   `Full particle-in-cell simulations of kinetic equilibria and the role of the initial current sheet on steady asymmetric magnetic reconnection`,
-  `J. Plasma Phys. 82, 905820305 (2016) <http://dx.doi.org/10.1017/S002237781600057X>`_
+  `Journal of Plasma Physics 82, 905820305 (2016) <http://dx.doi.org/10.1017/S002237781600057X>`_
 
 .. [Chiaramello2016]
 
@@ -1144,10 +1211,10 @@ Following is the distribution of these topics in the listed publications up to N
 
   A. Beck, J.T. Frederiksen and J. Dérouillat,
   `Load management strategy for Particle-In-Cell simulations in high energy particle acceleration`,
-  `Nucl. Inst. Meth. in Phys. Res. A 829, 418-421 (2016) <http://dx.doi.org/10.1016/j.nima.2016.03.112>`_
+  `Nuclear Instuments and Methods in Physics Research A 829, 418-421 (2016) <http://dx.doi.org/10.1016/j.nima.2016.03.112>`_
 
 .. [Lancia2016]
 
   L. Lancia, A. Giribono, L. Vassura, M. Chiaramello, C. Riconda, S. Weber, A. Castan, A. Chatelain, A. Frank, T. Gangolf, M. N. Quinn, J. Fuchs and J.-R. Marquès,
   `Signatures of the Self-Similar Regime of Strongly Coupled Stimulated Brillouin Scattering for Efficient Short Laser Pulse Amplification`,
-  `Phys. Rev. Lett. 116, 075001 (2016) <http://dx.doi.org/10.1103/PhysRevLett.116.075001>`_
+  `Physical Review Letters 116, 075001 (2016) <http://dx.doi.org/10.1103/PhysRevLett.116.075001>`_
diff --git a/doc/Sphinx/Overview/partners.rst b/doc/Sphinx/Overview/partners.rst
index 69b87e746..87d9d978a 100755
--- a/doc/Sphinx/Overview/partners.rst
+++ b/doc/Sphinx/Overview/partners.rst
@@ -52,11 +52,11 @@ Partners
 |            |   `Maison de la Simulation <https://mdls.fr/>`_ (MdlS), USR 3441                                        |
 |            |                                                                                                         |
 +            +---------------------------------------------------------------------------------------------------------+
-|            |   * `Olga Abramkina <olga.abramkina@idriss.fr>`_                                                        |
-|            |   * `Julien Dérouillat <julien.derouillat@cea.fr>`_                                                     |
+|            |   * `Olga Abramkina <olga.abramkina@idris.fr>`_ (Developer)                                             |
+|            |   * `Julien Dérouillat <julien.derouillat@cea.fr>`_ (Cofounder)                                         |
 |            |   * `Haithem Kallala <haithem.kallala@cea.fr>`_                                                         |
-|            |   * `Mathieu Lobet <mathieu.lobet@cea.fr>`_                                                             |
-|            |   * `Charles Prouveur <charles.prouveur@cea.fr>`_                                                       |
+|            |   * `Mathieu Lobet <mathieu.lobet@cea.fr>`_ (Developer)                                                 |
+|            |   * `Charles Prouveur <charles.prouveur@cea.fr>`_ (Architect)                                           |
 |            |                                                                                                         |
 +------------+---------------------------------------------------------------------------------------------------------+
 
@@ -67,11 +67,11 @@ Partners
 |            |   `Laboratoire pour l'Utilisation des Lasers Intenses <https://luli.ip-paris.fr/>`_ (LULI), UMR 7605        |
 |            |                                                                                                             |
 +            +-------------------------------------------------------------------------------------------------------------+
-|            |   * `Mickael Grech <mickael.grech@polytechnique.edu>`_                                                      |
-|            |   * `Tommaso Vinci <tommaso.vinci@polytechnique.edu>`_                                                      |
+|            |   * `Mickael Grech <mickael.grech@polytechnique.edu>`_ (Founder)                                            |
+|            |   * `Tommaso Vinci <tommaso.vinci@polytechnique.edu>`_ (Developer)                                          |
 |            |   * `Marco Chiaramello <marco.chiaramello@polytechnique.edu>`_                                              |
 |            |   * `Anna Grassi <anna.grassi@polytechnique.edu>`_                                                          |
-|            |   * `Frédéric Pérez <frederic.perez@polytechnique.edu>`_                                                    |
+|            |   * `Frédéric Pérez <frederic.perez@polytechnique.edu>`_ (Community manager, Developer)                     |
 |            |   * `Caterina Riconda <caterina.riconda@upmc.fr>`_                                                          |
 |            |                                                                                                             |
 +------------+-------------------------------------------------------------------------------------------------------------+
@@ -83,9 +83,9 @@ Partners
 |            |   `Laboratoire Leprince-Ringuet <https://llr.in2p3.fr>`_ (LLR), UMR 7638                                |
 +            +---------------------------------------------------------------------------------------------------------+
 |            |                                                                                                         |
-|            |   * `Arnaud Beck <beck@llr.in2p3.fr>`_                                                                  |
+|            |   * `Arnaud Beck <beck@llr.in2p3.fr>`_ (Project Coordinator, Cofounder, Developer)                      |
 |            |   * `Imen Zemzemi <zemzemi@llr.in2p3.fr>`_                                                              |
-|            |   * `Guillaume Bouchard <bouchard@llr.in2p3.fr>`_                                                       |
+|            |   * `Guillaume Bouchard <bouchard@llr.in2p3.fr>`_ (Developer)                                           |
 +------------+---------------------------------------------------------------------------------------------------------+
 
 .. rst-class:: noborder
@@ -95,7 +95,7 @@ Partners
 |            |   `Laboratoire de Physique des Gaz et des Plasmas <https://www.lpgp.universite-paris-saclay.fr>`_ (LPGP), UMR 8578   |
 +            +----------------------------------------------------------------------------------------------------------------------+
 |            |                                                                                                                      |
-|            |   * `Francesco Massimo <francesco.massimo@universite-paris-saclay.fr>`_                                              |
+|            |   * `Francesco Massimo <francesco.massimo@universite-paris-saclay.fr>`_ (Developer)                                  |
 +------------+----------------------------------------------------------------------------------------------------------------------+
 
 .. rst-class:: noborder
@@ -105,7 +105,7 @@ Partners
 |            |   `Institut du developpement et des ressources en informatique scientifique <http://www.idris.fr>`_ (IDRIS), UPS 851 |
 +            +----------------------------------------------------------------------------------------------------------------------+
 |            |                                                                                                                      |
-|            |   * `Olga Abramkina <olga.abramkina@idriss.fr>`_                                                                     |
+|            |   * `Olga Abramkina <olga.abramkina@idris.fr>`_ (Developer)                                                          |
 |            |   * `Marie Flé <marie.fle@idris.fr>`_                                                                                |
 +------------+----------------------------------------------------------------------------------------------------------------------+
 
diff --git a/doc/Sphinx/Overview/releases.rst b/doc/Sphinx/Overview/releases.rst
index 8027d2f3d..e271b32c5 100755
--- a/doc/Sphinx/Overview/releases.rst
+++ b/doc/Sphinx/Overview/releases.rst
@@ -16,22 +16,53 @@ Get Smilei
 
 You can find older, `unsupported versions here <https://github.com/SmileiPIC/Smilei/releases>`_
 
+..
+.. ----
+
+.. .. _latestVersion:
+
+.. Changes made in the repository (not released)
+.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 ----
 
-.. _latestVersion:
+Ongoing projects
+^^^^^^^^^^^^^^^^
+
+* Already available, but experimental:
+
+  * Particle merging
+  * Nuclear reactions
+  * Perfectly Matched Layers
+  * NewParticles diagnostic
+
+* In preparation:
+
+  * Spectral solvers
+
 
-Changes made in the repository (not released)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+----
+
+Release 5.1
+^^^^^^^^^^^^^^^^^^^^^
 
-* GPU:
+* **GPU**:
 
+  * ``1Dcartesian`` geometry now available.
   * Compilation simplified and better documented.
+  * Improved performance of particle sorting.
 
-* Features:
+* **Features**:
 
   * Relativistic field initialization now supports multiple species and both direction propagations.
+  * Added the argument ``phase_offset`` in laser definitions such as ``LaserGaussian2D``.
+  * The ``LaserGaussianAM`` definition will only use one coordinate for its ``focus`` argument 
+    (the transverse coordinate of the focus in this geometry is zero).
+  * Small improvements in PML for envelope model (AM and 2D).
+  * Deprecated ``smilei_rand_max``.
+  * New namelist variables ``smilei_omp_threads`` and ``smilei_total_cores``.
 
-* Happi:
+* **Happi**:
 
   * In ``Scalar``, it is now possible to make an operation on scalars such as ``"Uelm+Ukin"``.
   * The list of available scalars can be obtained from ``getScalars()``.
@@ -40,16 +71,11 @@ Changes made in the repository (not released)
   * Changed coordinate reference for 2D probe in 3D or AM geometry
     (zero is the box origin projected orthogonally on the probe plane).
 
-* Documentation:
+* **Documentation**:
 
   * Dark theme (click the switch on the bottom left, or set browser preferences).
 
-* Added the argument ``phase_offset`` in laser definitions such as ``LaserGaussian2D``.
-* The ``LaserGaussianAM`` definition will only use one coordinate for its ``focus`` argument 
-  (the transverse coordinate of the focus in this geometry is zero).
-* Small improvements in PML for envelope model (AM and 2D).
-
-* Bug fixes:
+* **Bug fixes** :
 
   * ``dump_minutes`` often failed to write some checkpoint files.
   * ``"auto"`` limits in ``ParticleBinning`` could fail with only one side on ``"auto"``.
@@ -57,23 +83,6 @@ Changes made in the repository (not released)
 
 ----
 
-Projects
-^^^^^^^^^^^^^^^^
-
-* Already available, but experimental:
-
-  * Particle merging
-  * Nuclear reactions
-  * Perfectly Matched Layers
-  * NewParticles diagnostic
-
-* In preparation:
-
-  * Spectral solvers
-
-
-----
-
 Release 5.0
 ^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst
index ad318954c..a07f19005 100755
--- a/doc/Sphinx/Use/namelist.rst
+++ b/doc/Sphinx/Use/namelist.rst
@@ -60,7 +60,8 @@ for each MPI process). The following steps are executed:
 
    * The rank of the current MPI process as :py:data:`smilei_mpi_rank`.
    * The total number of MPI processes as :py:data:`smilei_mpi_size`.
-   * The maximum random integer as :py:data:`smilei_rand_max`.
+   * The number of OpenMP threads per MPI :py:data:`smilei_omp_threads`.
+   * The total number of cores :py:data:`smilei_total_cores`.
 
 #. The namelist(s) is executed.
 
@@ -1147,6 +1148,9 @@ Each species has to be defined in a ``Species`` block::
   :ref:`tracking <DiagTrackParticles>`. The available fields are ``"Ex"``, ``"Ey"``, ``"Ez"``, 
   ``"Bx"``, ``"By"`` and ``"Bz"``.
   
+  Note that magnetic field components, as they originate from the interpolator,
+  are shifted by half a timestep compared to those from the *Fields* diagnostics.
+  
   Additionally, the work done by each component of the electric field is available as
   ``"Wx"``, ``"Wy"`` and ``"Wz"``. Contrary to the other interpolated fields, these quantities
   are accumulated over time.
@@ -2715,7 +2719,8 @@ or several points arranged in a 2-D or 3-D grid.
   * **In "AMcylindrical" geometry**, probes are defined with 3D Cartesian coordinates
     and cannot be separated per mode. Use Field diagnostics for cylindrical coordinates and
     information per mode.
-
+  * **Probes rely on the particle interpolator to compute fields** so that the
+    magnetic field is shifted by half a timestep compared to that of *Fields* diagnostics.
 
 To add one probe diagnostic, include the block ``DiagProbe``::
 
@@ -3342,19 +3347,20 @@ for instance::
     def my_filter(particles):
         return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.)
 
-.. Warning:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta.
-  They are actually the velocities multiplied by the lorentz factor, i.e.,
-  :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only
-  inside the ``filter`` function (not for the output of the diagnostic).
-
-.. Note:: The ``id`` attribute contains the :doc:`particles identification number<ids>`.
-  This number is set to 0 at the beginning of the simulation. **Only after particles have
-  passed the filter**, they acquire a positive ``id``.
-
-.. Note:: For advanced filtration, Smilei provides the quantity ``Main.iteration``,
-  accessible within the ``filter`` function. Its value is always equal to the current
-  iteration number of the PIC loop. The current time of the simulation is thus
-  ``Main.iteration * Main.timestep``.
+.. Note::
+  
+  * In the ``filter`` function only, the ``px``, ``py`` and ``pz`` quantities
+    are not exactly the momenta.
+    They are actually the velocities multiplied by the lorentz factor, i.e.,
+    :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`.
+    This is *not* true for the output of the diagnostic.
+  * The ``id`` attribute contains the :doc:`particles identification number<ids>`.
+    This number is set to 0 at the beginning of the simulation. **Only after particles have
+    passed the filter**, they acquire a positive ``id``.
+  * For advanced filtration, Smilei provides the quantity ``Main.iteration``,
+    accessible within the ``filter`` function. Its value is always equal to the current
+    iteration number of the PIC loop. The current time of the simulation is thus
+    ``Main.iteration * Main.timestep``.
 
 .. py:data:: attributes
 
@@ -3367,6 +3373,11 @@ for instance::
   (``"chi"``, only for species with radiation losses) or the fields interpolated
   at their  positions (``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"``, ``"Bz"``).
 
+.. Note:: Here, interpolated fields are normally computed after the Maxwell solver.
+  They may thus differ by half a timestep from those computed at the middle of the
+  timestep to push particles. When exact values are needed, use the option
+  :py:data:`keep_interpolated_fields`.
+
 ----
 
 .. rst-class:: experimental
@@ -3619,9 +3630,15 @@ namelist. They should not be re-defined by the user!
 
   The total number of MPI processes.
 
-..
-  <<Not showing this anymore because of new rand system>>
-  .. py:data:: smilei_rand_max
+.. py:data:: smilei_omp_threads
+
+  The number of OpenMP threads per MPI.
+
+.. py:data:: smilei_total_cores
 
-    The largest random integer.
+  The total number of cores.
 
+.. note::
+  
+  These variables can be access during ``happi`` post-processing, e.g.
+  ``S.namelist.smilei_mpi_size``.
\ No newline at end of file
diff --git a/doc/Sphinx/implementation.rst b/doc/Sphinx/implementation.rst
index 46bf953e9..aab91c2c9 100644
--- a/doc/Sphinx/implementation.rst
+++ b/doc/Sphinx/implementation.rst
@@ -10,10 +10,10 @@ and conveniency for non-advanced C++ users.
 The repository is composed of the following directories:
 
 - ``Licence``: contains code licence information
-- ``doc``: conatins the Sphinx doc files
+- ``doc``: contains the Sphinx doc files
 - ``src``: contains all source files
 - ``happi``: contains the sources of the happi Python tool for visualization
-- ``benchmarks``: contains the benchmarks used by the validation process. these becnhamrks are also examples for users.
+- ``benchmarks``: contains the benchmarks used by the validation process, these benchmarks are also examples for users.
 - ``scripts``: contains multiple tool scripts for compilation and more
 
   - ``compile_tools``: contains scripts and machine files used by the makefile for compilation
@@ -23,7 +23,7 @@ The repository is composed of the following directories:
 
 The source files directory is as well composed of several sub-directories to organise the `.cpp` and `.h` files by related thematics.
 The main is the file `Smilei.cpp`.
-There is always only one class definition per file and the file name correcponds to the class name.
+There is always only one class definition per file and the file name corresponds to the class name.
 
 The general implementation is later summarized in :numref:`smilei_main_loop`
 
@@ -54,10 +54,10 @@ Notion of operators
 An operator is a class that operates on input data to provide a processed information.
 Input data can be parameters and data containers.
 Output data can be processed data from data containers or updated data containers.
-An operator is a class functor (overloadind of the ``()`` ).
-Sometime, operator provides additional methods called wrappers to provide differents simplified or adapted interfaces.
-An operator do not store data or temporarely.
-for instance, the particle interpolation, push and proection are operators.
+An operator is a class functor (overloading of the ``()`` ).
+Sometime, operator provides additional methods called wrappers to provide different simplified or adapted interfaces.
+An operator do not store data or temporarily.
+for instance, the particle interpolation, push and protection are operators.
 
 .. _operator:
 
@@ -71,7 +71,7 @@ Notion of domain parts
 
 Domain parts are classes that represents some specific levels of the domain decomposition.
 They can be seen as high-level data container or container of data container.
-They contain some methods to handle, manange and access the local data.
+They contain some methods to handle, manage and access the local data.
 For instance, patches and ``Species`` are domain parts:
 
 - ``Species`` contains the particles.
@@ -80,10 +80,10 @@ For instance, patches and ``Species`` are domain parts:
 Notion of factory
 ------------------------------------
 
-Some objects such as operators or data containers have sereral variations.
+Some objects such as operators or data containers have several variations.
 For this we use inheritance.
 A base class is used for common parameters and methods and derived classes are used for all variations.
-The factory uses user-defined input parameters to determine the right derive class to choose and initiate them as shown in :numref:`factory`.
+The factory uses user-defined input parameters to determine the right derived class to choose and initiate them as shown in :numref:`factory`.
 For instance, there are several ``push`` operators implemented all derived from a base ``push`` class.
 The ``push`` factory will determine the right one to use.
 
@@ -97,7 +97,7 @@ The ``push`` factory will determine the right one to use.
 Other
 ------------------------------------
 
-Some classes are used for specific actions in the code such as the initilization process.
+Some classes are used for specific actions in the code such as the initialization process.
 
 -----------------------------------------------------------------
 
@@ -106,7 +106,7 @@ III. Domain decomposition and parallelism
 
 The simulation domain is divided multiple times following a succession of decomposition levels.
 The whole domain is the superimposition of different grids for each electromagnetic field component
-and macro-particules.
+and macro-particles.
 Let us represent schematically the domain as an array of cells as in Fig. :numref:`full_domain`.
 Each cell contains a certain population of particles (that can differ from cell to cell).
 
@@ -127,8 +127,8 @@ The domain becomes a collection of patches as shown in :numref:`patch_domain_dec
 
   The domain in :program:`Smilei` is a collection of patches.
 
-A patch is an independant piece of the whole simulation domain.
-It therefore owns local electrmognatic grids and list of macro-particles.
+A patch is an independent piece of the whole simulation domain.
+It therefore owns the local electromagnetic grids and list of macro-particles.
 Electromagnetic grids have ghost cells that represent the information located in the neighboring patches (not shown in :numref:`patch_domain_decomposition`).
 All patches have the same spatial size .i.e. the same number of cells.
 The size of a patch is calculated so that all local field grids (ghost cells included) can fit in L2 cache.
@@ -144,7 +144,7 @@ The distribution can be ensured in an equal cartesian way or using a load balanc
   Patches are then distributed among MPI processes in so-called MPI patch collections.
 
 Inside MPI patch collection, OpenMP loop directives are used to distribute the computation of the patches among the available threads.
-Since each patch have a different number of particles, this approach enables a dynamic scheduling depending on the specified OpenMP scheduler.
+Since each patch has a different number of particles, this approach enables a dynamic scheduling depending on the specified OpenMP scheduler.
 As shown in :numref:`smilei_main_loop`, a synchronization step is required to exchange grid ghost cells and particles traveling from patch to patch.
 
 The patch granularity is used for:
@@ -163,7 +163,7 @@ The patch can be decomposed into bins as shown in :numref:`bin_decomposition`.
 
   Bin decomposition.
 
-Contrary to patch, a bin is not an independant data structure with its own arrays.
+Contrary to patch, a bin is not an independent data structure with its own arrays.
 It represents a smaller portion of the patch grids through specific start and end indexes.
 For the macro-particles, a sorting algorithm is used to ensure that in the macro-particles
 located in the same bin are grouped and contiguous in memory.
@@ -288,7 +288,7 @@ located in the file `src/Tools.h`.
 - `ERROR_NAMELIST`: this function should be used for namelist error. It takes in argument a simple message and a link to the documentation. It throws as well a SIGABRT signal.
 - `MESSAGE`: this function should be used to output an information message (it uses `std::cout`).
 - `DEBUG` : should be used for debugging messages (for the so-called DEBUG mode)
-- `WARNING` : should be used to thrown a warning. A warning alerts the users of a possible issue or to be carreful with some parameters without stoping the program.
+- `WARNING` : should be used to thrown a warning. A warning alerts the users of a possible issue or to be careful with some parameters without stopping the program.
 
 --------------------------------------------------------------------------------
 
@@ -547,7 +547,7 @@ file ``Smilei.cpp`` thought calls to different ``vecPatches`` methods.
 
 .. code-block:: c++
 
-    vecPatches.finalizeAndSortParticles( params, &smpi, simWindow,
+    vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow,
                                                  time_dual, timers, itime );
 
 * **Particle merging**: merging process for particles (still experimental)
@@ -618,7 +618,7 @@ We first loop on the patches and then the species of
 each patch ``ipatch``: ``(*this )( ipatch )->vecSpecies.size()``.
 For each species, the method ``Species::dynamics`` is called to perform the
 dynamic step of the respective particles.
-The OpenMP parallelism is explicitely applied in ``vecPatches::dynamics`` on the patch loop as shown
+The OpenMP parallelism is explicitly applied in ``vecPatches::dynamics`` on the patch loop as shown
 in the following pieces of code.
 
 .. code-block:: c++
diff --git a/doc/Sphinx/smilei_theme/layout.html b/doc/Sphinx/smilei_theme/layout.html
index 592f7e532..1bed82e81 100755
--- a/doc/Sphinx/smilei_theme/layout.html
+++ b/doc/Sphinx/smilei_theme/layout.html
@@ -97,7 +97,7 @@
         <div class="header">
         <div class="logo">
             <a href="{{ pathto(master_doc) }}">
-                <img class="logo" src="{{ pathto('_static/' ~ logo, 1) }}" alt="Logo" />
+                <img class="logo" src="{{ pathto('_static/smileiLogo.svg', 1) }}" alt="Logo" />
             </a>
         </div>
         
diff --git a/doc/Sphinx/smilei_theme/static/smilei_theme.css_t b/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
index fdf918810..4de1a7428 100755
--- a/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
+++ b/doc/Sphinx/smilei_theme/static/smilei_theme.css_t
@@ -172,6 +172,10 @@ a:hover {
 	text-decoration: underline;
 }
 
+a:visited {
+	color:{{ theme_main_color_bold }}; color: var(--main_bold);
+}
+
 div.body h1,
 div.body h2,
 div.body h3,
@@ -402,6 +406,14 @@ table.footnote td {
 	padding: 0.3em 0.5em;
 }
 
+table.noborder {
+  width: 100%;
+}
+
+table.noborder tr:first-child td:first-child {
+  width: 7em;
+}
+
 table.noborder, table.noborder td {
 	border:0 !important;
 }
diff --git a/happi/_Diagnostics/TrackParticles.py b/happi/_Diagnostics/TrackParticles.py
index 253bb2958..0825eb0f3 100755
--- a/happi/_Diagnostics/TrackParticles.py
+++ b/happi/_Diagnostics/TrackParticles.py
@@ -447,8 +447,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ):
 						for k, name in self._short_properties_from_raw.items():
 							if k not in group: continue
 							ordered = self._np.empty((nparticles_to_write, ), dtype=group[k].dtype)
-							if k == "id": ordered.fill(0)
-							else        : ordered.fill(self._np.nan)
+							if k == "id"      : ordered.fill(0)
+							elif k == "charge": ordered.fill(9999)
+							else              : ordered.fill(self._np.nan)
 							ordered[locs] = group[k][()][selectedIndices]
 							f0[name].write_direct(ordered, dest_sel=self._np.s_[it,:])
 				
@@ -461,8 +462,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ):
 					for first_o, last_o, npart_o in ChunkedRange(nparticles_to_write, chunksize):
 						for k, name in self._short_properties_from_raw.items():
 							if k not in group: continue
-							if k == "id": data[k].fill(0)
-							else        : data[k].fill(self._np.nan)
+							if k == "id"      : data[k].fill(0)
+							elif k == "charge": data[k].fill(9999)
+							else              : data[k].fill(self._np.nan)
 						# Loop chunks of the input
 						for first_i, last_i, npart_i in ChunkedRange(nparticles, chunksize):
 							# Obtain IDs
@@ -538,7 +540,10 @@ def _generateRawData(self, times=None):
 								data[it,:] -= self._XmovedForTime[time]
 						else:
 							data = self._readUnstructuredH5(self._h5items[axis], self.selectedParticles, first_time, last_time)
-						data[deadParticles] = self._np.nan
+						if data.dtype == float:
+							data[deadParticles] = self._np.nan
+						else:
+							data[deadParticles] = 9999
 						self._rawData[axis] = data
 
 				if self._verbose: print("Process broken lines ...")
diff --git a/happi/_Utils.py b/happi/_Utils.py
index 9fd35a757..070046786 100755
--- a/happi/_Utils.py
+++ b/happi/_Utils.py
@@ -42,7 +42,10 @@ def updateMatplotLibColormaps():
 	if "smilei" in matplotlib.pyplot.colormaps(): return
 	def register(name, d):
 		cmap = matplotlib.colors.LinearSegmentedColormap(name, d, N=256, gamma=1.0)
-		matplotlib.pyplot.register_cmap(cmap=cmap)
+		try:
+			matplotlib.pyplot.register_cmap(cmap=cmap)
+		except Exception as e:
+			matplotlib.colormaps.register(cmap)
 	register(u"smilei", {
 			'red'  :((0., 0., 0.), (0.0625 , 0.091, 0.091), (0.09375, 0.118, 0.118), (0.125 , 0.127, 0.127), (0.1875 , 0.135, 0.135), (0.21875, 0.125, 0.125), (0.28125, 0.034, 0.034), (0.3125 , 0.010, 0.010), (0.34375, 0.009, 0.009), (0.4375 , 0.049, 0.049), (0.46875, 0.057, 0.057), (0.5 , 0.058, 0.058), (0.59375, 0.031, 0.031), (0.625 , 0.028, 0.028), (0.65625, 0.047, 0.047), (0.71875, 0.143, 0.143), (0.78125, 0.294, 0.294), (0.84375, 0.519, 0.519), (0.90625, 0.664, 0.664), (0.9375 , 0.760, 0.760), (0.96875, 0.880, 0.880), (1., 1., 1. )),
 			'green':((0., 0., 0.), (0.21875, 0.228, 0.228), (0.78125, 0.827, 0.827), (0.8125 , 0.852, 0.852), (0.84375, 0.869, 0.869), (0.9375 , 0.937, 0.937), (0.96875, 0.967, 0.967), (1. , 1. , 1. )),
@@ -398,7 +401,11 @@ def __init__(self, operation, QuantityTranslator, ureg):
 							raise Exception("Quantity "+q+" not understood")
 			# Calculate the total units and its inverse
 			locals().update(self.imports)
-			units = eval("".join(basic_op)).units
+			units = eval("".join(basic_op))
+			if isinstance(units, (int, float)):
+				units = ureg.Quantity(1) # dimensionless
+			else:
+				units = units.units
 			self.translated_units = units.format_babel(locale="en")
 			# Make the operation string
 			self.translated_operation = "".join(full_op)
diff --git a/makefile b/makefile
index 3aaff0201..277a2237d 100755
--- a/makefile
+++ b/makefile
@@ -52,7 +52,7 @@ DIRS := $(shell find src -type d)
 SRCS := $(shell find src/* -name \*.cpp)
 OBJS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.o))
 DEPS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.d))
-SITEDIR = $(shell $(PYTHONEXE) -c 'import site; site._script()' --user-site)
+SITEDIR = $(shell d=`$(PYTHONEXE) -m site --user-site` && echo $$d || $(PYTHONEXE) -c "import sysconfig; print(sysconfig.get_path('purelib'))")
 
 # Smilei tools
 TABLES_DIR := tools/tables
@@ -202,9 +202,9 @@ endif
 ifneq (,$(call parse_config,gpu_nvidia))
 	override config += noopenmp # Prevent openmp for nvidia
 	
-	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE
+	CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC
 	GPU_COMPILER ?= nvcc
-	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%)
+	GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC $(DIRS:%=-I%)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
@@ -214,9 +214,9 @@ endif
 
 # AMD GPUs
 ifneq (,$(call parse_config,gpu_amd))
-	CXXFLAGS += -DSMILEI_ACCELERATOR_MODE
+	CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP
 	GPU_COMPILER ?= $(CC)
-	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS)
+	GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP -std=c++14 $(DIRS:%=-I%)
 	GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS)
 	GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu)
 	GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o))
diff --git a/scripts/compile_tools/machine/adastra b/scripts/compile_tools/machine/adastra
index 7aab184ce..14c2a975a 100644
--- a/scripts/compile_tools/machine/adastra
+++ b/scripts/compile_tools/machine/adastra
@@ -85,7 +85,6 @@ ADASTRA_DEBUG_FLAGS           := -g -ggdb $(ADASTRA_DEBUG_SANITIZER_FLAGS) -v #
 
 ifneq (,$(call parse_config,gpu_amd))
     # When using OMP
-    ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS := -DSMILEI_ACCELERATOR_GPU_OMP=1
 
     # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908
 	# ADASTRA_ACCELERATOR_GPU_TARGET := gfx908:xnack-
diff --git a/scripts/compile_tools/machine/jean_zay_gpu_V100 b/scripts/compile_tools/machine/jean_zay_gpu_V100
index 7fa7ce513..cc9d15c8b 100644
--- a/scripts/compile_tools/machine/jean_zay_gpu_V100
+++ b/scripts/compile_tools/machine/jean_zay_gpu_V100
@@ -5,12 +5,25 @@
 # Documentation:
 # http://www.idris.fr/jean-zay
 #
+# Use the following commented commands to have the proper environment for compilation and running 
+#
+# module purge
+# module load anaconda-py3/2020.11
+# module load nvidia-compilers/23.11
+# module load cuda/12.2.0
+# module load openmpi/4.1.5-cuda
+# module load hdf5/1.12.0-mpi-cuda
+# export HDF5_ROOT_DIR=/gpfslocalsup/spack_soft/hdf5/1.12.0/nvhpc-23.11-i5lyakq3iu254ru3eqe2yukvg7airopl
+# export I_MPI_CXX=pgc++
+# export SMILEICXX=mpic++
+# export CICCFLAG="--c++14"
+
 
 SMILEICXX_DEPS = g++
 #GPU_COMPILER = nvcc
 
 CXXFLAGS += -w
-CXXFLAGS += -ta=tesla:cc70 -std=c++14 -lcurand -Minfo=accel # what is offloaded/copied
+CXXFLAGS += -acc=gpu -gpu=cc70 -std=c++14 -lcurand -Minfo=accel # what is offloaded/copied
 # CXXFLAGS += -Minfo=all   # very verbose output
 CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
 
@@ -18,4 +31,4 @@ CXXFLAGS += -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1
 GPU_COMPILER_FLAGS += -O3 --std c++14 -arch=sm_70 
 GPU_COMPILER_FLAGS += --expt-relaxed-constexpr
 
-LDFLAGS += -ta=tesla:cc70 -std=c++14 -Mcudalib=curand -lcudart -lcurand -lacccuda
+LDFLAGS += -acc=gpu -gpu=cc70 -std=c++14 -cudalib=curand -lcudart -lcurand -lacccuda
diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2
index a9406d60d..80cf09198 100644
--- a/scripts/compile_tools/machine/ruche_gpu2
+++ b/scripts/compile_tools/machine/ruche_gpu2
@@ -26,7 +26,7 @@ GPU_COMPILER_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 t
 CXXFLAGS        += -Minfo=accel # what is offloaded/copied
 # CXXFLAGS        += -Minfo=all   # very verbose output
 
-# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE'
+# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_ACCELERATOR_GPU_OACC'
 # CXXFLAGS        += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP
 # GPU_COMPILER_FLAGS +=         -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ?
 # LDFLAGS                      += -mp=gpu
diff --git a/src/Checkpoint/Checkpoint.cpp b/src/Checkpoint/Checkpoint.cpp
index 13c3d28a5..3cbb6c12a 100755
--- a/src/Checkpoint/Checkpoint.cpp
+++ b/src/Checkpoint/Checkpoint.cpp
@@ -233,7 +233,7 @@ void Checkpoint::dumpAll( VectorPatch &vecPatches, Region &region, unsigned int
     MESSAGE( " Checkpoint #" << num_dump << " at iteration " << itime << " dumped" );
 #endif
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     MESSAGE( " Copying device data in main memory" );
     // TODO(Etienne M): This may very well be redundant if we did a diagnostic
     // during the last iteration. Indeed, we copy everything from the device to
@@ -478,8 +478,8 @@ void Checkpoint::dumpPatch( Patch *patch, Params &params, H5Write &g )
                 name << setfill( '0' ) << setw( 2 ) << bcId;
                 string groupName=Tools::merge( "EM_boundary-species-", name.str() );
                 H5Write b = g.group( groupName );
-                b.attr( "By_val", embc->By_val );
-                b.attr( "Bz_val", embc->Bz_val );
+                b.attr( "By_val", embc->By_val_ );
+                b.attr( "Bz_val", embc->Bz_val_ );
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] ) ) {
                 ElectroMagnBC2D_SM *embc = static_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] );
                 ostringstream name( "" );
@@ -889,8 +889,8 @@ void Checkpoint::restartPatch( Patch *patch, Params &params, H5Read &g )
                 name << setfill( '0' ) << setw( 2 ) << bcId;
                 string groupName = Tools::merge( "EM_boundary-species-", name.str() );
                 H5Read b = g.group( groupName );
-                b.attr( "By_val", embc->By_val );
-                b.attr( "Bz_val", embc->Bz_val );
+                b.attr( "By_val", embc->By_val_ );
+                b.attr( "Bz_val", embc->Bz_val_ );
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] ) ) {
                 ElectroMagnBC2D_SM *embc = static_cast<ElectroMagnBC2D_SM *>( EMfields->emBoundCond[bcId] );
                 ostringstream name( "" );
diff --git a/src/Diagnostic/DiagnosticProbes.cpp b/src/Diagnostic/DiagnosticProbes.cpp
index 5e79eecc9..e66c684e7 100755
--- a/src/Diagnostic/DiagnosticProbes.cpp
+++ b/src/Diagnostic/DiagnosticProbes.cpp
@@ -740,7 +740,7 @@ void DiagnosticProbes::run( SmileiMPI *smpi, VectorPatch &vecPatches, int itime,
         // Interpolate all usual fields on probe ("fake") particles of current patch
         unsigned int iPart_MPI = offset_in_MPI[ipatch];
         unsigned int maxPart_MPI = offset_in_MPI[ipatch] + npart;
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         smpi->resizeDeviceBuffers( ithread,
                                    nDim_particle,
                                    npart );
diff --git a/src/Diagnostic/DiagnosticScalar.cpp b/src/Diagnostic/DiagnosticScalar.cpp
index fe88f47d9..9b8b17409 100755
--- a/src/Diagnostic/DiagnosticScalar.cpp
+++ b/src/Diagnostic/DiagnosticScalar.cpp
@@ -436,7 +436,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             
             const unsigned int nPart=vecSpecies[ispec]->getNbrOfParticles(); // number of particles
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
             const double *const __restrict__ weight_ptr = vecSpecies[ispec]->particles->getPtrWeight();
             const short  *const __restrict__ charge_ptr = vecSpecies[ispec]->particles->getPtrCharge();
             const double *const __restrict__ momentum_x = vecSpecies[ispec]->particles->getPtrMomentum(0);
@@ -447,14 +447,14 @@ void DiagnosticScalar::compute( Patch *patch, int )
             if( vecSpecies[ispec]->mass_ > 0 ) {
 
 // GPU mode
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: density)  \
 		      is_device_ptr(weight_ptr) \
 		      reduction(+:density) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr)
     #pragma acc loop gang worker vector reduction(+:density) 
 #endif
@@ -468,7 +468,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
 		      map(tofrom: charge)  \
 		      is_device_ptr( charge_ptr, weight_ptr) \
                       reduction(+:charge)  
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr, charge_ptr)
     #pragma acc loop gang worker vector reduction(+:charge)
 #endif
@@ -484,7 +484,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
                       momentum_y /* [istart:particle_number] */,             \
                       momentum_z /* [istart:particle_number] */)             \
                       reduction(+:ener_tot) 
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc parallel deviceptr(weight_ptr, \
                   momentum_x,                                           \
                   momentum_y,                                           \
@@ -525,14 +525,14 @@ void DiagnosticScalar::compute( Patch *patch, int )
             } else if( vecSpecies[ispec]->mass_ == 0 ) {
 
 // GPU mode
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: density)  \
 		      is_device_ptr(weight_ptr) \
 		      reduction(+:density) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(weight_ptr)
     #pragma acc loop gang worker vector reduction(+:density) 
 #endif
@@ -548,7 +548,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
                       momentum_y /* [istart:particle_number] */,             \
                       momentum_z /* [istart:particle_number] */)             \
                       reduction(+:ener_tot) 
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc parallel deviceptr(weight_ptr, \
                   momentum_x,                                           \
                   momentum_y,                                           \
@@ -667,7 +667,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             // total energy in current field
             double Uem = 0.;
             if( ! AM ) {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                 Uem = field->norm2OnDevice( EMfields->istart, EMfields->bufsize );
 #else
                 Uem = field->norm2( EMfields->istart, EMfields->bufsize );
@@ -751,7 +751,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
             j_max = iFieldStart[1];
             k_max = iFieldStart[2];
 
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
             // We use scalar rather than arrays because omp target 
             // sometime fails to pass them to the device
             const unsigned int ixstart = iFieldStart[0];
@@ -776,7 +776,7 @@ void DiagnosticScalar::compute( Patch *patch, int )
 		        map(tofrom: minval, maxval, i_min, i_max, j_min, j_max, k_min, k_max)  \
                 map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) 
 	        //reduction(min:minval)
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field_data) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(3)
 #endif
diff --git a/src/Diagnostic/DiagnosticTrack.cpp b/src/Diagnostic/DiagnosticTrack.cpp
index 16ac325e9..583caab94 100755
--- a/src/Diagnostic/DiagnosticTrack.cpp
+++ b/src/Diagnostic/DiagnosticTrack.cpp
@@ -188,7 +188,7 @@ void DiagnosticTrack::setIDs( Patch *patch )
     for( unsigned int iPart=0; iPart<s; iPart++ ) {
         patch->vecSpecies[species_index_]->particles->id( iPart ) = ++latest_Id;
     }
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     patch->vecSpecies[species_index_]->particles->initializeIDsOnDevice();
 #endif
 }
diff --git a/src/ElectroMagn/ElectroMagn.cpp b/src/ElectroMagn/ElectroMagn.cpp
index 2c75bc6a4..02467ecd4 100755
--- a/src/ElectroMagn/ElectroMagn.cpp
+++ b/src/ElectroMagn/ElectroMagn.cpp
@@ -555,7 +555,7 @@ void ElectroMagn::applyAntenna( unsigned int iAntenna, double intensity )
 //! Compute the total density and currents from species density and currents on Device
 //! This function is valid wathever the geometry
 // ---------------------------------------------------------------------------------------------------------------------
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 void ElectroMagn::computeTotalRhoJOnDevice()
 {
 
@@ -577,7 +577,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
         double *const __restrict__ rhosp = rho_s[ispec] ? rho_s[ispec]->data() : nullptr;
 
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc parallel present( \
                                           Jxp[0:Jx_size],     \
                                           Jyp[0:Jy_size],     \
@@ -594,7 +594,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jx_size; i++ ) {
@@ -605,7 +605,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jy_size; i++ ) {
@@ -616,7 +616,7 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jz_size; i++ ) {
@@ -627,14 +627,14 @@ void ElectroMagn::computeTotalRhoJOnDevice()
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<rho_size; i++ ) {
                 rhop[i] += rhosp[i];
             }
         }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
         } // end parallel region
 #endif
 
diff --git a/src/ElectroMagn/ElectroMagn.h b/src/ElectroMagn/ElectroMagn.h
index b5bd3e5bc..0ac78db10 100755
--- a/src/ElectroMagn/ElectroMagn.h
+++ b/src/ElectroMagn/ElectroMagn.h
@@ -325,7 +325,7 @@ class ElectroMagn
     //! Method used to sum all species densities and currents to compute the total charge density and currents
     virtual void computeTotalRhoJ() = 0;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     //! Method used to sum all species densities and currents to compute the total charge density and currents
     //! This function is valid wathever the geometry
     virtual void computeTotalRhoJOnDevice();
diff --git a/src/ElectroMagn/ElectroMagn1D.cpp b/src/ElectroMagn/ElectroMagn1D.cpp
index ea97df8fb..a444e19f5 100755
--- a/src/ElectroMagn/ElectroMagn1D.cpp
+++ b/src/ElectroMagn/ElectroMagn1D.cpp
@@ -559,34 +559,65 @@ void ElectroMagn1D::saveMagneticFields( bool is_spectral )
 void ElectroMagn1D::centerMagneticFields()
 {
     // Static cast of the fields
-    Field1D *Bx1D   = static_cast<Field1D *>( Bx_ );
-    Field1D *By1D   = static_cast<Field1D *>( By_ );
-    Field1D *Bz1D   = static_cast<Field1D *>( Bz_ );
-    Field1D *Bx1D_m = static_cast<Field1D *>( Bx_m );
-    Field1D *By1D_m = static_cast<Field1D *>( By_m );
-    Field1D *Bz1D_m = static_cast<Field1D *>( Bz_m );
+    const double *const __restrict__ Bx1D = Bx_->data();
+    const double *const __restrict__ By1D = By_->data();
+    const double *const __restrict__ Bz1D = Bz_->data();
+    double *const __restrict__ Bx1D_m     = Bx_m->data();
+    double *const __restrict__ By1D_m     = By_m->data();
+    double *const __restrict__ Bz1D_m     = Bz_m->data();
+    const unsigned int nx_p = dimPrim[0];
+    const unsigned int nx_d = dimDual[0];
+
 
     // for Bx^(p)
-    for( unsigned int i=0 ; i<dimPrim[0] ; i++ ) {
-        ( *Bx1D_m )( i ) = ( ( *Bx1D )( i )+ ( *Bx1D_m )( i ) )*0.5 ;
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
+    const int sizeofBx = Bx_->size();
+    const int sizeofBy = By_->size();
+    const int sizeofBz = Bz_->size();
+
+    #pragma acc parallel present(Bx1D[0:sizeofBx],Bx1D_m[0:sizeofBx])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for //simd
+#endif
+    for( unsigned int i=0 ; i<nx_p ; i++ ) {
+        Bx1D_m[i] = ( Bx1D[i] + Bx1D_m[i] ) * 0.5;
     }
 
     // for By^(d) & Bz^(d)
-    for( unsigned int i=0 ; i<dimDual[0] ; i++ ) {
-        ( *By1D_m )( i )= ( ( *By1D )( i )+( *By1D_m )( i ) )*0.5 ;
-        ( *Bz1D_m )( i )= ( ( *Bz1D )( i )+( *Bz1D_m )( i ) )*0.5 ;
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
+    #pragma acc parallel present(Bz1D[0:sizeofBz],Bz1D_m[0:sizeofBz],By1D[0:sizeofBy],By1D_m[0:sizeofBy])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for //simd
+#endif
+    for( unsigned int i=0 ; i<nx_d ; i++ ) {
+        By1D_m[i] = ( By1D[i] + By1D_m[i] ) * 0.5;
+        Bz1D_m[i] = ( Bz1D[i] + Bz1D_m[i] ) * 0.5;
     }
     
     if (use_BTIS3){
-        // Static-cast of the fields
-        Field1D *By_oldBTIS3 = static_cast<Field1D *>( By_mBTIS3 );
-        Field1D *Bz_oldBTIS3 = static_cast<Field1D *>( Bz_mBTIS3 );
-
-        for( unsigned int i=0 ; i<dimPrim[0]-1 ; i++ ) {
+        double *const By1D_oldBTIS3 = By_mBTIS3->data();
+        double *const Bz1D_oldBTIS3 = Bz_mBTIS3->data();
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
+    const int sizeofByBTIS3 = By_mBTIS3->size();
+    const int sizeofBzBTIS3 = Bz_mBTIS3->size();
+    #pragma acc parallel present(By1D_oldBTIS3[0:sizeofByBTIS3],By1D[0:sizeofBy],Bz1D_oldBTIS3[0:sizeofBzBTIS3],Bz1D[0:sizeofBz])
+    #pragma acc loop gang vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_GPU )
+        #pragma omp simd
+#endif
+        for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
             // Magnetic field By^(p) for BTIS3 interpolation
-            ( *By_oldBTIS3 )( i ) = ( ( *By1D )( i+1 ) + ( *By_oldBTIS3 )( i ) )*0.5;
+            By1D_oldBTIS3[i] = ( By1D[i+1] + By1D_oldBTIS3[i] ) * 0.5;
             // Magnetic field Bz^(p) for BTIS3 interpolation
-            ( *Bz_oldBTIS3 )( i ) = ( ( *Bz1D )( i+1 ) + ( *Bz_oldBTIS3 )( i ) )*0.5;
+            Bz1D_oldBTIS3[i] = ( Bz1D[i+1] + Bz1D_oldBTIS3[i] ) * 0.5;
         }
     }
     
@@ -783,7 +814,7 @@ void ElectroMagn1D::computeTotalRhoJ()
     }//END loop on species ispec
 }
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagn1D::computeTotalRhoJOnDevice()
 // {
diff --git a/src/ElectroMagn/ElectroMagn1D.h b/src/ElectroMagn/ElectroMagn1D.h
index 4ea226a6c..01ff3ed7b 100755
--- a/src/ElectroMagn/ElectroMagn1D.h
+++ b/src/ElectroMagn/ElectroMagn1D.h
@@ -115,7 +115,7 @@ class ElectroMagn1D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagn2D.cpp b/src/ElectroMagn/ElectroMagn2D.cpp
index e14b4c826..f270aac18 100755
--- a/src/ElectroMagn/ElectroMagn2D.cpp
+++ b/src/ElectroMagn/ElectroMagn2D.cpp
@@ -1217,7 +1217,7 @@ void ElectroMagn2D::centerMagneticFields()
     double *const __restrict__ Bz2D_m     = Bz_m->data();
 
 // Magnetic field Bx^(p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBx = Bx_->size();
     const int sizeofBy = By_->size();
     const int sizeofBz = Bz_->size();
@@ -1229,10 +1229,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1241,7 +1241,7 @@ void ElectroMagn2D::centerMagneticFields()
     }
 
     // Magnetic field By^(d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(By2D[0:sizeofBy],By2D_m[0:sizeofBy])
     #pragma acc loop gang worker
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1249,10 +1249,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -1260,7 +1260,7 @@ void ElectroMagn2D::centerMagneticFields()
         }
     }
     // Magnetic field Bz^(d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(Bz2D[0:sizeofBz],Bz2D_m[0:sizeofBz])
     #pragma acc loop gang worker
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1268,10 +1268,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1282,7 +1282,7 @@ void ElectroMagn2D::centerMagneticFields()
         double *const             By2D_oldBTIS3 = By_mBTIS3->data();
         double *const             Bz2D_oldBTIS3 = Bz_mBTIS3->data();
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofByBTIS3 = By_mBTIS3->size();
     #pragma acc parallel present(By2D_oldBTIS3[0:sizeofByBTIS3],By2D[0:sizeofBy])
     #pragma acc loop gang
@@ -1291,17 +1291,17 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
         for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
             for( unsigned int y = 0; y < ny_p; ++y ) {
                 By2D_oldBTIS3[x * ny_p + y] = ( By2D[(x+1) * ny_p + y] + By2D_oldBTIS3[x * ny_p + y] ) * 0.5;
             }
         }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBzBTIS3 = Bz_mBTIS3->size();
     #pragma acc parallel present(Bz2D_oldBTIS3[0:sizeofBz],Bz2D[0:sizeofBz])
     #pragma acc loop gang
@@ -1310,10 +1310,10 @@ void ElectroMagn2D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
         for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop vector
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
             for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -1392,7 +1392,7 @@ void ElectroMagn2D::computeTotalRhoJ()
 //END computeTotalRhoJ
 }
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagn2D::computeTotalRhoJOnDevice()
 // {
diff --git a/src/ElectroMagn/ElectroMagn2D.h b/src/ElectroMagn/ElectroMagn2D.h
index aecb87ab8..d8cdfb031 100755
--- a/src/ElectroMagn/ElectroMagn2D.h
+++ b/src/ElectroMagn/ElectroMagn2D.h
@@ -115,7 +115,7 @@ class ElectroMagn2D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagn3D.cpp b/src/ElectroMagn/ElectroMagn3D.cpp
index c8994d75c..41ba9cc58 100755
--- a/src/ElectroMagn/ElectroMagn3D.cpp
+++ b/src/ElectroMagn/ElectroMagn3D.cpp
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <sstream>
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <openacc.h>
 #endif
 
@@ -1207,7 +1207,7 @@ void ElectroMagn3D::centerMagneticFields()
     double *const __restrict__ Bz3D_m     = Bz_m->data();
 
     // Magnetic field Bx^(p,d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBx = Bx_->size();
     const int sizeofBy = By_->size();
     const int sizeofBz = Bz_->size();
@@ -1219,11 +1219,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1240,7 +1240,7 @@ void ElectroMagn3D::centerMagneticFields()
     }
 
     // Magnetic field By^(d,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(By3D[0:sizeofBy],By3D_m[0:sizeofBy])
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1248,12 +1248,12 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1267,7 +1267,7 @@ void ElectroMagn3D::centerMagneticFields()
     }
 
     // Magnetic field Bz^(d,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(Bz3D[0:sizeofBz],Bz3D_m[0:sizeofBz])
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -1275,11 +1275,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1296,7 +1296,7 @@ void ElectroMagn3D::centerMagneticFields()
         // Static-cast of the fields
         double *const __restrict__ BymBTIS3 = By_mBTIS3->data();
         double *const __restrict__ BzmBTIS3 = Bz_mBTIS3->data();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofByBTIS3 = By_mBTIS3->size();
     #pragma acc parallel present(By3D[0:sizeofBy],BymBTIS3[0:sizeofByBTIS3])
     #pragma acc loop gang
@@ -1305,11 +1305,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
             for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
@@ -1323,7 +1323,7 @@ void ElectroMagn3D::centerMagneticFields()
                 }
             }
          }
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofBzBTIS3 = Bz_mBTIS3->size();
     #pragma acc parallel present(Bz3D[0:sizeofBz],BzmBTIS3[0:sizeofBzBTIS3])
     #pragma acc loop gang
@@ -1332,11 +1332,11 @@ void ElectroMagn3D::centerMagneticFields()
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
         for( unsigned int i=0 ; i<nx_p-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
             for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
             // EMPTY
diff --git a/src/ElectroMagn/ElectroMagn3D.h b/src/ElectroMagn/ElectroMagn3D.h
index ac3abfab1..dabef1d55 100755
--- a/src/ElectroMagn/ElectroMagn3D.h
+++ b/src/ElectroMagn/ElectroMagn3D.h
@@ -108,7 +108,7 @@ class ElectroMagn3D : public ElectroMagn
     //! Method used to compute the total charge density and currents by summing over all species on CPU (Host)
     void computeTotalRhoJ() override;
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override;
 // #endif
diff --git a/src/ElectroMagn/ElectroMagnAM.cpp b/src/ElectroMagn/ElectroMagnAM.cpp
index 31204694d..700c0b901 100755
--- a/src/ElectroMagn/ElectroMagnAM.cpp
+++ b/src/ElectroMagn/ElectroMagnAM.cpp
@@ -1730,7 +1730,7 @@ void ElectroMagnAM::computeTotalRhoJ()
     }//END loop on mmodes
 } //END computeTotalRhoJ
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 // //! Method used to compute the total charge density and currents by summing over all species on Device
 // void ElectroMagnAM::computeTotalRhoJOnDevice()
 // {
@@ -1891,7 +1891,7 @@ void ElectroMagnAM::applyExternalFields( Patch *patch )
         if (input[1] && copy[1]) Br_m[imode]->copyFrom( Br_[imode] );
         if (input[2] && copy[2]) Bt_m[imode]->copyFrom( Bt_[imode] );
     }
-    ElectroMagnAM *emAM = static_cast<ElectroMagnAM *>( patch->EMfields );
+    // ElectroMagnAM *emAM = static_cast<ElectroMagnAM *>( patch->EMfields );
     //emAM->compute_B_m_fromEB();
 }
 
@@ -1900,7 +1900,7 @@ void ElectroMagnAM::compute_B_m_fromEB()
 {
     const unsigned int nl_p = dimPrim[0];
     const unsigned int nl_d = dimDual[0];
-    const unsigned int nr_p = dimPrim[1];
+    // const unsigned int nr_p = dimPrim[1];
     const unsigned int nr_d = dimDual[1];
     const unsigned int Nmodes = El_.size();
 
diff --git a/src/ElectroMagn/ElectroMagnAM.h b/src/ElectroMagn/ElectroMagnAM.h
index 979581b4c..cd3063113 100755
--- a/src/ElectroMagn/ElectroMagnAM.h
+++ b/src/ElectroMagn/ElectroMagnAM.h
@@ -157,7 +157,7 @@ class ElectroMagnAM : public ElectroMagn
     void computeTotalRhoJ() override;
 
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //     //! Method used to compute the total charge density and currents by summing over all species on Device
 //     void computeTotalRhoJOnDevice() override ;
 // #endif
diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
index d00c3cdb1..ff767bc12 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.cpp
@@ -17,23 +17,23 @@ ElectroMagnBC1D_SM::ElectroMagnBC1D_SM( Params &params, Patch *patch, unsigned i
     : ElectroMagnBC1D( params, patch, i_boundary )
 {
     // Parameters for the Silver-Mueller boundary conditions
-    Alpha = 2./( 1.+dt_ov_d[0] );
-    Beta  = ( dt_ov_d[0]-1. )/( 1.+dt_ov_d[0] );
-    Gamma = 4./( 1.+dt_ov_d[0] );
+    Alpha_ = 2. / ( 1. + dt_ov_d[0] );
+    Beta_  = ( dt_ov_d[0] - 1. ) / ( 1. + dt_ov_d[0] );
+    Gamma_ = 4. / ( 1. + dt_ov_d[0] );
     
-    By_val = 0.;
-    Bz_val = 0.;
+    By_val_ = 0.;
+    Bz_val_ = 0.;
     
     sign_ = (double) (i_boundary_ % 2) *2 - 1.; // -1 or 1 for min or max
     
     if( i_boundary == 0 ) {
-        iE = 0;
-        iB = 0;
-        iB_old = 1;
+        iE_ = 0;
+        iB_ = 0;
+        iB_old_ = 1;
     } else {
-        iE = n_p[0] - 1;
-        iB = n_d[0] - 1;
-        iB_old = iB - 1;
+        iE_ = n_p[0] - 1;
+        iB_ = n_d[0] - 1;
+        iB_old_ = iB_ - 1;
     }
     
 }
@@ -50,15 +50,15 @@ void ElectroMagnBC1D_SM::save_fields( Field *my_field, Patch *patch )
     
     if( i_boundary_ == 0 && patch->isXmin() ) {
         if( field1D->name=="By" ) {
-            By_val = ( *my_field )( 0 );
+            By_val_ = ( *my_field )( 0 );
         } else if( field1D->name=="Bz" ) {
-            Bz_val = ( *my_field )( 0 );
+            Bz_val_ = ( *my_field )( 0 );
         }
     } else if( i_boundary_ == 1 && patch->isXmax() ) {
         if( field1D->name=="By" ) {
-            By_val = ( *my_field )( field1D->dims()[0]-1 );
+            By_val_ = ( *my_field )( field1D->dims()[0]-1 );
         } else if( field1D->name=="Bz" ) {
-            Bz_val = ( *my_field )( field1D->dims()[0]-1 );
+            Bz_val_ = ( *my_field )( field1D->dims()[0]-1 );
         }
         
     }
@@ -74,11 +74,17 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
     if( patch->isBoundary( i_boundary_ ) ) {
     
         //Field1D* Ex1D   = static_cast<Field1D*>(EMfields->Ex_);
-        Field1D *Ey1D   = static_cast<Field1D *>( EMfields->Ey_ );
+        /*Field1D *Ey1D   = static_cast<Field1D *>( EMfields->Ey_ );
         Field1D *Ez1D   = static_cast<Field1D *>( EMfields->Ez_ );
         Field1D *By1D   = static_cast<Field1D *>( EMfields->By_ );
-        Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_ );
+        Field1D *Bz1D   = static_cast<Field1D *>( EMfields->Bz_ );*/
         
+        const Field  *E[3]{ EMfields->Ex_, EMfields->Ey_, EMfields->Ez_ };
+        const Field  *B[3]{ EMfields->Bx_, EMfields->By_, EMfields->Bz_ };
+        const double *const __restrict__ E1 = E[1]->data_;
+        const double *const __restrict__ E2 = E[2]->data_;
+        double *const __restrict__ B1       = B[1]->data_;
+        double *const __restrict__ B2       = B[2]->data_;
         // Lasers
         double by = 0., bz = 0.;
         vector<double> pos( 1 );
@@ -88,11 +94,25 @@ void ElectroMagnBC1D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             bz += vecLaser[ilaser]->getAmplitude1( pos, time_dual, 0, 0 );
         }
         
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
+        const int sizeofE1 = E[1]->number_of_points_;
+        const int sizeofE2 = E[2]->number_of_points_;
+        const int sizeofB1 = B[1]->number_of_points_;
+        const int sizeofB2 = B[2]->number_of_points_;
+#endif
         // Apply Silver-Mueller EM boundary condition at x=xmin or xmax
         
-        ( *By1D )( iB ) = -sign_*Alpha*( *Ez1D )( iE ) + Beta*( ( *By1D )( iB_old )-By_val ) + Gamma*by + By_val;
-        ( *Bz1D )( iB ) =  sign_*Alpha*( *Ey1D )( iE ) + Beta*( ( *Bz1D )( iB_old )-Bz_val ) + Gamma*bz + Bz_val;
-        
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
+        #pragma acc parallel present(E1[0:sizeofE1],E2[0:sizeofE2],B1[0:sizeofB1],B2[0:sizeofB2])
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+        #pragma omp target
+#endif
+        {
+            //( *By1D )( iB_ ) = -sign_*Alpha_*( *Ez1D )( iE_ ) + Beta_*( ( *By1D )( iB_old_ )-By_val_ ) + Gamma_*by + By_val_;
+            //( *Bz1D )( iB_ ) =  sign_*Alpha_*( *Ey1D )( iE_ ) + Beta_*( ( *Bz1D )( iB_old_ )-Bz_val_ ) + Gamma_*bz + Bz_val_;
+            B1[ iB_ ] = -sign_ * Alpha_ * E2[iE_] + Beta_ * ( B1[iB_old_] - By_val_) + Gamma_ * by + By_val_;
+            B2[ iB_ ] =  sign_ * Alpha_ * E1[iE_] + Beta_ * ( B2[iB_old_] - Bz_val_) + Gamma_ * bz + Bz_val_;
+        }
     }
     
 }
diff --git a/src/ElectroMagnBC/ElectroMagnBC1D_SM.h b/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
index ac17f856d..ccbc499c1 100755
--- a/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
+++ b/src/ElectroMagnBC/ElectroMagnBC1D_SM.h
@@ -17,16 +17,16 @@ class ElectroMagnBC1D_SM : public ElectroMagnBC1D
     
     void save_fields( Field *, Patch *patch ) override;
     
-    double By_val, Bz_val;
+    double By_val_, Bz_val_;
     
     
 private:
 
     //! Constants used for the Silver-Mueller boundary conditions
-    double Alpha, Beta, Gamma;
+    double Alpha_, Beta_, Gamma_;
     
     //! Locations to apply the profile
-    unsigned int iE, iB, iB_old;
+    unsigned int iE_, iB_, iB_old_;
     int sign_;
     
 };
diff --git a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
index 42ce8c381..2d257cbd5 100755
--- a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp
@@ -68,9 +68,9 @@ ElectroMagnBC2D_SM::ElectroMagnBC2D_SM( Params &params, Patch *patch, unsigned i
 
 ElectroMagnBC2D_SM::~ElectroMagnBC2D_SM()
 {
-    for (int i=0 ; i<B_val.size() ; ++i){
-        smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( B_val[i].data(), B_val[i].size() );
-        //delete[] B_val[i];
+    for( auto B: B_val ){
+        smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( B.data(), B.size() );
+        //delete[] B;
     }
 }
 
@@ -136,7 +136,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         const double *const __restrict__ B_ext1 = B_val[1].data();
         const double *const __restrict__ B_ext2 = B_val[2].data();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         const int sizeofE0 = E[0]->number_of_points_;
         const int sizeofE1 = E[1]->number_of_points_;
         const int sizeofE2 = E[2]->number_of_points_;
@@ -182,7 +182,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( db1, b1_size );
 
         if( axis0_ == 0 ) { // for By^(d,p)
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -199,7 +199,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                     + B_ext1[j];
             }
         } else { // for Bx^(p,d)
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -234,7 +234,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // for Bz^(d,d)
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -247,7 +247,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E0[0:sizeofE0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size])
             #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
diff --git a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
index 3ae113e60..ba4e61b28 100755
--- a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
+++ b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp
@@ -186,7 +186,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
         const int isBoundary2min = patch->isBoundary( axis2_, 0 );
         const int isBoundary2max = patch->isBoundary( axis2_, 1 );
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         const int sizeofE0 = E[axis0_]->number_of_points_;
         const int sizeofE1 = E[axis1_]->number_of_points_;
         const int sizeofE2 = E[axis2_]->number_of_points_;
@@ -217,7 +217,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // B1
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -225,7 +225,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int j=isBoundary1min; j<n1p-isBoundary1max ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min ; k<n2d-isBoundary2max ; k++ ) {
@@ -239,7 +239,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else if( axis0_ == 1 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -247,7 +247,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1p-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min ; k<n2d-isBoundary2max ; k++ ) {
@@ -261,7 +261,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -269,7 +269,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1p-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop worker vector
 #endif
                 for( unsigned int j=isBoundary2min ; j<n2d-isBoundary2max ; j++ ) {
@@ -304,7 +304,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
 
         // B2
         if( axis0_ == 0 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -312,7 +312,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int j=isBoundary1min; j<n1d-isBoundary1max ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min; k<n2p-isBoundary2max ; k++ ) {
@@ -326,7 +326,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else if( axis0_ == 1 ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -334,7 +334,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1d-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int k=isBoundary2min; k<n2p-isBoundary2max ; k++ ) {
@@ -348,7 +348,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
                 }
             }
         } else {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc parallel present(E1[0:sizeofE1],B0[0:sizeofB0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],B_ext0[0:B_ext_size0],db2[0:b2_size])
             #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -356,7 +356,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch *
             #pragma omp teams distribute parallel for collapse( 2 )
 #endif
             for( unsigned int i=isBoundary1min; i<n1d-isBoundary1max ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 #pragma acc loop worker vector
 #endif
                 for( unsigned int j=isBoundary2min; j<n2p-isBoundary2max ; j++ ) {
diff --git a/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp b/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
index 502fca0f3..7c2607263 100644
--- a/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_Friedman.cpp
@@ -28,7 +28,7 @@ void MA_Solver1D_Friedman::operator()( ElectroMagn *fields )
     Field1D *Ex1D   = static_cast<Field1D *>( fields->Ex_ );
     Field1D *Ey1D   = static_cast<Field1D *>( fields->Ey_ );
     Field1D *Ez1D   = static_cast<Field1D *>( fields->Ez_ );
-    Field1D *Bx1D   = static_cast<Field1D *>( fields->Bx_ );
+    // Field1D *Bx1D   = static_cast<Field1D *>( fields->Bx_ );
     Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
     Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
     Field1D *Jx1D   = static_cast<Field1D *>( fields->Jx_ );
diff --git a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
index 7e04123f4..803ffc6cb 100755
--- a/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver1D_norm.cpp
@@ -17,26 +17,56 @@ void MA_Solver1D_norm::operator()( ElectroMagn *fields )
 {
     const unsigned int nx_p = fields->dimPrim[0];
     const unsigned int nx_d = fields->dimDual[0];
-    Field1D *Ex1D = static_cast<Field1D *>( fields->Ex_ );
-    Field1D *Ey1D = static_cast<Field1D *>( fields->Ey_ );
-    Field1D *Ez1D = static_cast<Field1D *>( fields->Ez_ );
-    Field1D *By1D = static_cast<Field1D *>( fields->By_ );
-    Field1D *Bz1D = static_cast<Field1D *>( fields->Bz_ );
-    Field1D *Jx1D = static_cast<Field1D *>( fields->Jx_ );
-    Field1D *Jy1D = static_cast<Field1D *>( fields->Jy_ );
-    Field1D *Jz1D = static_cast<Field1D *>( fields->Jz_ );
-    
+
+    double *const __restrict__ Ex1D       = fields->Ex_->data(); // [x] : dual in x   primal in y,z
+    double *const __restrict__ Ey1D       = fields->Ey_->data(); // [x] : dual in y   primal in x,z
+    double *const __restrict__ Ez1D       = fields->Ez_->data(); // [x] : dual in z   primal in x,y
+    //const double *const __restrict__ Bx1D = fields->Bx_->data(); // [x] : dual in y,z primal in x
+    const double *const __restrict__ By1D = fields->By_->data(); // [x] : dual in x,z primal in y
+    const double *const __restrict__ Bz1D = fields->Bz_->data(); // [x] : dual in x,y primal in z
+    const double *const __restrict__ Jx1D = fields->Jx_->data(); // [x] : dual in x   primal in y,z
+    const double *const __restrict__ Jy1D = fields->Jy_->data(); // [x] : dual in y   primal in x,z
+    const double *const __restrict__ Jz1D = fields->Jz_->data(); // [x] : dual in z   primal in x,y 
+
     // --------------------
     // Solve Maxwell-Ampere
     // --------------------
     // Calculate the electrostatic field ex on the dual grid
-    for( unsigned int ix=0 ; ix<nx_d ; ix++ ) {
-        ( *Ex1D )( ix )= ( *Ex1D )( ix ) - dt * ( *Jx1D )( ix ) ;
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
+    const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
+    const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
+    const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
+    //const int sizeofBx = fields->Bx_->number_of_points_;                                                                               
+    const int sizeofBy = fields->By_->number_of_points_;                                                                               
+    const int sizeofBz = fields->Bz_->number_of_points_;                   
+    #pragma acc parallel present( Ex1D[0:sizeofEx], Jx1D[0:sizeofEx] )
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_GPU )
+        #pragma omp simd
+#endif
+    for( unsigned int ix=0 ; ix<nx_d ; ++ix ) {
+        //( *Ex1D )( ix )= ( *Ex1D )( ix ) - dt * ( *Jx1D )( ix ) ;
+        Ex1D[ix] -= dt * Jx1D[ix];
     }
-    // Transverse fields ey, ez  are defined on the primal grid
-    for( unsigned int ix=0 ; ix<nx_p ; ix++ ) {
-        ( *Ey1D )( ix )= ( *Ey1D )( ix ) - dt_ov_dx * ( ( *Bz1D )( ix+1 ) - ( *Bz1D )( ix ) ) - dt * ( *Jy1D )( ix ) ;
-        ( *Ez1D )( ix )= ( *Ez1D )( ix ) + dt_ov_dx * ( ( *By1D )( ix+1 ) - ( *By1D )( ix ) ) - dt * ( *Jz1D )( ix ) ;
+    // Transverse fields ey, ez  are defined on the primal grid    #pragma acc parallel present( Ex1D[0:sizeofEx], Jx1D[0:sizeofEx], Bx1D[0:sizeofBz],Ey1D[0:sizeofEx], Jy1D[0:sizeofEx], By1D[0:sizeofBz],Ez1D[0:sizeofEx], Jz1D[0:sizeofEx], Bz1D[0:sizeofBz]  )                             
+
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                    
+    #pragma acc parallel present(Ey1D[0:sizeofEy], Jy1D[0:sizeofEy], By1D[0:sizeofBy],Ez1D[0:sizeofEz], Jz1D[0:sizeofEz], Bz1D[0:sizeofBz])
+    #pragma acc loop gang worker vector
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_GPU )
+        #pragma omp simd
+#endif
+    for( unsigned int ix=0 ; ix<nx_p ; ++ix ) {
+        Ey1D[ix] += -dt_ov_dx * ( Bz1D[ix+1] - Bz1D[ix] ) - dt * Jy1D[ix];
+        Ez1D[ix] +=  dt_ov_dx * ( By1D[ix+1] - By1D[ix] ) - dt * Jz1D[ix];
     }
 }
 
diff --git a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
index d12e021c1..4cd0d7d7c 100755
--- a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp
@@ -37,7 +37,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     // double sumJz = 0;
 
     // Electric field Ex^(d,p)
-#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
     const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
     const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
     const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
@@ -52,10 +52,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_d; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -64,7 +64,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     }
 
     // Electric field Ey^(p,d)
-#if defined( SMILEI_OPENACC_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) 
     #pragma acc parallel present( Ey2D[0:sizeofEy], Jy2D[0:sizeofEy], Bz2D[0:sizeofBz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -72,10 +72,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_d; ++y ) {
@@ -84,7 +84,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     }
 
     // Electric field Ez^(p,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ez2D[0:sizeofEz], Jz2D[0:sizeofEz], Bx2D[0:sizeofBx], By2D[0:sizeofBy] )                             
     #pragma acc loop gang   
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -92,10 +92,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_p; ++x ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
         for( unsigned int y = 0; y < ny_p; ++y ) {
diff --git a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
index 9b2a089cc..7ffea26c0 100755
--- a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
+++ b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp
@@ -35,7 +35,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     const unsigned int nz_d = fields->dimDual[2];
     
     // Electric field Ex^(d,p,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofEx = fields->Ex_->number_of_points_;
     const int sizeofEy = fields->Ey_->number_of_points_;
     const int sizeofEz = fields->Ez_->number_of_points_;
@@ -50,11 +50,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_d ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
@@ -66,7 +66,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     }
     
     // Electric field Ey^(p,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ey3D[0:sizeofEy], Jy3D[0:sizeofEy], Bx3D[0:sizeofBx], Bz3D[0:sizeofBz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -74,11 +74,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_d ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
@@ -90,7 +90,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     }
     
     // Electric field Ez^(p,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Ez3D[0:sizeofEz], Jz3D[0:sizeofEz], Bx3D[0:sizeofBx], By3D[0:sizeofBy] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -98,11 +98,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ;  i<nx_p ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_d ; k++ ) {
diff --git a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
index d36d815e4..3cf5fd09c 100755
--- a/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver1D_Yee.cpp
@@ -19,26 +19,36 @@ void MF_Solver1D_Yee::operator()( ElectroMagn *fields )
     // const unsigned int nx_p = fields->dimPrim[0];
     const unsigned int nx_d = fields->dimDual[0];
     
-    // Static-cast of the fields
-    Field1D* Ey1D;
-    Field1D* Ez1D;
-    if (isEFilterApplied) {
-        Ey1D = static_cast<Field1D*>(fields->filter_->Ey_[0]);
-        Ez1D = static_cast<Field1D*>(fields->filter_->Ez_[0]);
-    } else {
-        Ey1D = static_cast<Field1D*>(fields->Ey_);
-        Ez1D = static_cast<Field1D*>(fields->Ez_);
-    }
-    Field1D *By1D   = static_cast<Field1D *>( fields->By_ );
-    Field1D *Bz1D   = static_cast<Field1D *>( fields->Bz_ );
+    const double *const __restrict__ Ey1D = isEFilterApplied ? fields->filter_->Ey_[0]->data() :
+                                                               fields->Ey_->data(); // [ix] : dual in y   primal in x,z
+    const double *const __restrict__ Ez1D = isEFilterApplied ? fields->filter_->Ez_[0]->data() :
+                                                               fields->Ez_->data();// [ix] : dual in z   primal in x,y
+    
+    double *const __restrict__ By1D       = fields->By_->data();// [ix] : dual in x,z primal in y
+    double *const __restrict__ Bz1D       = fields->Bz_->data();// [ix] : dual in x,y primal in z
+    
     // ---------------------
     // Solve Maxwell-Faraday
     // ---------------------
     // NB: bx is given in 1d and defined when initializing the fields (here put to 0)
     // Transverse fields  by & bz are defined on the dual grid
-    //for (unsigned int ix=1 ; ix<nx_p ; ix++) {
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                    
+    const int sizeofEy = fields->Ey_->number_of_points_;
+    const int sizeofEz = fields->Ez_->number_of_points_;
+    const int sizeofBy = fields->By_->number_of_points_;
+    const int sizeofBz = fields->Bz_->number_of_points_;
+    #pragma acc parallel present( By1D[0:sizeofBy], Bz1D[0:sizeofBz],Ey1D[0:sizeofEy],Ez1D[0:sizeofEz] )                                               
+    #pragma acc loop gang vector             
+#elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target
+    #pragma omp teams distribute parallel for
+#endif
+#if !defined( SMILEI_ACCELERATOR_GPU )
+    #pragma omp simd
+#endif
     for( unsigned int ix=1 ; ix<nx_d-1 ; ix++ ) {
-        ( *By1D )( ix )= ( *By1D )( ix ) + dt_ov_dx * ( ( *Ez1D )( ix ) - ( *Ez1D )( ix-1 ) ) ;
-        ( *Bz1D )( ix )= ( *Bz1D )( ix ) - dt_ov_dx * ( ( *Ey1D )( ix ) - ( *Ey1D )( ix-1 ) ) ;
+        By1D[ix] = By1D[ix] + dt_ov_dx * ( Ez1D[ix] - Ez1D[ix-1] );
+        Bz1D[ix] = Bz1D[ix] - dt_ov_dx * ( Ey1D[ix] - Ey1D[ix-1] );
     }
+
 }
diff --git a/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
index 28fad57d9..7b66f6368 100755
--- a/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver2D_Yee.cpp
@@ -33,7 +33,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     double *const __restrict__ Bz2D       = fields->Bz_->data();                    // [x * ny_d + y] : dual in x,y primal in z
     
     // Magnetic field Bx^(p,d)
-#if defined( SMILEI_OPENACC_MODE )                                                                                                     
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )                                                                                                     
     const int sizeofEx = fields->Ex_->number_of_points_;                                                                               
     const int sizeofEy = fields->Ey_->number_of_points_;                                                                               
     const int sizeofEz = fields->Ez_->number_of_points_;                                                                               
@@ -48,10 +48,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 0; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 1; y < ny_d - 1; ++y ) {
@@ -59,7 +59,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
         }
     }
     // Magnetic field By^(d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( By2D[0:sizeofBy], Ez2D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -67,10 +67,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 1; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 0; y < ny_p; ++y ) {
@@ -79,7 +79,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field Bz^(d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Bz2D[0:sizeofBy], Ex2D[0:sizeofEx], Ey2D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -87,10 +87,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 2 )
 #endif
     for( unsigned int x = 1; x < nx_d - 1; ++x ) {
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         #pragma omp simd
 #endif
-#ifdef SMILEI_OPENACC_MODE                                                                                                             
+#ifdef SMILEI_ACCELERATOR_GPU_OACC                                                                                                             
             #pragma acc loop vector                                                                                                    
 #endif  
         for( unsigned int y = 1; y < ny_d - 1; ++y ) {
diff --git a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
index 5930af3e1..f70159699 100755
--- a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
+++ b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp
@@ -34,7 +34,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     const double * __restrict__ Ez3D = isEFilterApplied ? fields->filter_->Ez_[0]->data() : fields->Ez_->data();
 
     // Magnetic field Bx^(p,d,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int sizeofEx = fields->Ex_->number_of_points_;
     const int sizeofEy = fields->Ey_->number_of_points_;
     const int sizeofEz = fields->Ez_->number_of_points_;
@@ -49,11 +49,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=0 ; i<nx_p;  i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=1 ; j<ny_d-1 ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=1 ; k<nz_d-1 ; k++ ) {
@@ -64,7 +64,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field By^(d,p,d)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( By3D[0:sizeofBy], Ex3D[0:sizeofEx], Ez3D[0:sizeofEz] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -72,11 +72,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=1 ; i<nx_d-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=0 ; j<ny_p ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=1 ; k<nz_d-1 ; k++ ) {
@@ -87,7 +87,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     }
 
     // Magnetic field Bz^(d,d,p)
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( Bz3D[0:sizeofBz], Ex3D[0:sizeofEx], Ey3D[0:sizeofEy] )
     #pragma acc loop gang
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -95,11 +95,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields )
     #pragma omp teams distribute parallel for collapse( 3 )
 #endif
     for( unsigned int i=1 ; i<nx_d-1 ; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc loop worker
 #endif
         for( unsigned int j=1 ; j<ny_d-1 ; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( unsigned int k=0 ; k<nz_p ; k++ ) {
diff --git a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
index 36da377bf..7eb9c8810 100644
--- a/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
+++ b/src/ElectroMagnSolver/PML_Solver2D_Envelope.cpp
@@ -455,8 +455,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // ----
                     // dA/dx = dA/dx + ik0 A
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd
-                                                      + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd
+                    //                                   + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ;
                     std::complex<double> d2A_over_dx2 = d2A_over_dx2_fdtd
@@ -590,8 +590,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // ----
                     // dA/dx = dA/dx + ik0 A
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd
-                                                      + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd
+                    //                                   + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ;
                     std::complex<double> d2A_over_dx2 = d2A_over_dx2_fdtd
diff --git a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
index 7e4e740c7..d8c65645a 100644
--- a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
+++ b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp
@@ -395,7 +395,6 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
     double k0 = 1.; // laser wavenumber
     std::complex<double> source_term_x ;
     std::complex<double> source_term_y ;
-    double mpml_ratio = 0.00;
 
     if (iDim == 0) {
         for( unsigned int k=0 ; k<1 ; k++ ) {
@@ -405,7 +404,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ;
@@ -494,7 +493,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl) ;
@@ -635,8 +634,8 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim,
                 for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd
-                                                      + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd
+                    //                                   + i1*k0*( *G_n_pml )( i, j ) ;
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ;
                     std::complex<double> d2G_over_dx2 = d2G_over_dx2_fdtd
diff --git a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
index 771f12e37..c2a5c4087 100644
--- a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
+++ b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp
@@ -400,7 +400,6 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
     double k0 = 1.; // laser wavenumber
     std::complex<double> source_term_x ;
     std::complex<double> source_term_y ;
-    double mpml_ratio = 0.00;
 
     if (iDim == 0) {
         for( unsigned int k=0 ; k<1 ; k++ ) {
@@ -410,7 +409,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
                     // dA/dx = dA/dx + ik0 A
                     // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G
                     std::complex<double> dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ;
@@ -490,7 +489,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
                 for( unsigned int i=solvermin ; i<solvermax; i++ ) {
                     unsigned int j = 2; // j_p = 2 corresponds to r=0
                     std::complex<double> dA_over_dx_fdtd = (1.+delta)*( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *A_n_pml )( i+2, j )-( *A_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
+                    // std::complex<double> dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2A_over_dx2_fdtd = (1.+delta)*( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *A_n_pml )( i-2, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+2, j ) )/(4.*dl*dl) ;
@@ -591,7 +590,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en
             for( unsigned int i=2 ; i<nl_p-2; i++ ) { // x loop
                 for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop
                     std::complex<double> dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ;
-                    std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
+                    // std::complex<double> dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ;
                     // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A
                     // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G
                     std::complex<double> d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ;
diff --git a/src/Field/Field.cpp b/src/Field/Field.cpp
index 19c820d1d..0d8427f1e 100644
--- a/src/Field/Field.cpp
+++ b/src/Field/Field.cpp
@@ -5,14 +5,14 @@ void Field::put_to( double val )
 {
     SMILEI_ASSERT( data_ != nullptr );
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     const bool is_hostptr_mapped_on_device = smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( data_ );
 #endif
 
     // NVCC's OpenACC needs that redundant pointer value
     double* an_other_data_pointer = data_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     // Test if data exists on GPU, put_to can be used on CPU and GPU during a simulation
     #pragma acc parallel present( an_other_data_pointer [0:size()] ) if( is_hostptr_mapped_on_device )
     #pragma acc loop gang worker vector
@@ -25,7 +25,7 @@ void Field::put_to( double val )
     }
 }
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! copy the field array from Host to Device
     void Field::copyFromHostToDevice()
     {
diff --git a/src/Field/Field.h b/src/Field/Field.h
index 669106245..563705ab1 100755
--- a/src/Field/Field.h
+++ b/src/Field/Field.h
@@ -188,7 +188,7 @@ class Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! Compute the norm2OnDevice of the field
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 #endif
@@ -234,7 +234,7 @@ class Field
         return sum;
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
     inline double __attribute__((always_inline)) normOnDevice()
     {
@@ -245,7 +245,7 @@ class Field
     #pragma omp target teams distribute parallel for \
 		      map(tofrom: sum)  map(to: number_of_points_) \
 		      reduction(+:sum) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field) //deviceptr( data_ )
     #pragma acc loop gang worker vector reduction(+:sum)
 #endif
@@ -279,7 +279,7 @@ class Field
     virtual void extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0;
     virtual void inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size ) = 0;
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 
     //! copy the field from Host to Device
     void copyFromHostToDevice();
diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp
index d0fa18b2f..194660ce6 100755
--- a/src/Field/Field1D.cpp
+++ b/src/Field/Field1D.cpp
@@ -188,12 +188,37 @@ double Field1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
     
     return nrj;
 }
-
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
-    ERROR("Not implemented");
+
+    double nrj( 0. );
+    
+    int idxlocalstart[1];
+    int idxlocalend[1];
+    idxlocalstart[0] = istart[0][isDual_[0]];
+    idxlocalend[0]   = istart[0][isDual_[0]]+bufsize[0][isDual_[0]];
+
+    const double *const __restrict__ field = data();
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target teams distribute parallel for\
+		      map(tofrom: nrj)  \
+                      map(to: idxlocalstart[0]) \
+		      /* is_device_ptr( data_ )*/ \
+		      reduction(+:nrj) 
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+    #pragma acc parallel present(field) //deviceptr( data_ )
+    #pragma acc loop gang worker vector reduction(+:nrj)
+#endif
+
+    for( unsigned int i=idxlocalstart[0] ; i< idxlocalend[0] ; i++) {
+            nrj += field[i]*field[i];
+    }
+
+    return nrj;
+
 }
 #endif
 
@@ -246,15 +271,23 @@ void Field1D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
     if ( sendFields_[iDim*2+iNeighbor] == NULL ) {
         sendFields_[iDim*2+iNeighbor] = new Field1D(size);
         recvFields_[iDim*2+iNeighbor] = new Field1D(size);
+#if defined( SMILEI_ACCELERATOR_GPU ) 
+       if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
+           sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
+           recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
+       }
+#endif
     }
     else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) {
+#if defined( SMILEI_ACCELERATOR_GPU )
+        ERROR( "To Do GPU : envelope" );
+#endif
         delete sendFields_[iDim*2+iNeighbor];
         sendFields_[iDim*2+iNeighbor] = new Field1D(size);
         delete recvFields_[iDim*2+iNeighbor];
         recvFields_[iDim*2+iNeighbor] = new Field1D(size);
     }
 }
-
 void Field1D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -267,13 +300,30 @@ void Field1D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = sendFields_[iDim*2+iNeighbor]->data_;
-    double* field = data_;
+    double *__restrict__ sub = sendFields_[iDim*2+iNeighbor]->data_;
+    const double*__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = name[0] == 'B' &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    SMILEI_ASSERT( smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( field ) ==
+                   smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ) );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+    const int subSize = sendFields_[iDim*2+iNeighbor]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( (name.substr(0,1) == "B") );
+    #pragma acc parallel present( field[0:fSize], sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         sub[i] = field[ (ix+i) ];
     }
 }
-
 void Field1D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -286,8 +336,25 @@ void Field1D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
-    double* field = data_;
+    const double *__restrict__ sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
+    double *__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = name[0] == 'B' &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( tofrom : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+    int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( name.substr(0,1) == "B" );
+    #pragma acc parallel present( field[0:fSize], sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         field[ (ix+i) ] = sub[i];
     }
@@ -305,13 +372,29 @@ void Field1D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = sendFields_[iDim*2+iNeighbor]->data_;
-    double* field = data_;
+    double *__restrict__ sub         = sendFields_[iDim*2+iNeighbor]->data_;
+    const double *__restrict__ field = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = (name[0] == 'J' || name[0] == 'R') &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( to : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for 
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+    const int subSize = sendFields_[iDim*2+iNeighbor]->size();
+    const int fSize = number_of_points_;
+    bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ));
+    #pragma acc parallel copy(field[0:fSize]) present(  sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         sub[i] = field[ (ix+i) ];
     }
 }
-
 void Field1D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
 {
     std::vector<unsigned int> size = dims_;
@@ -324,9 +407,27 @@ void Field1D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
 
     unsigned int NX = size[0];
 
-    double* sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
-    double* field = data_;
+    const double *__restrict__ sub = recvFields_[iDim*2+(iNeighbor+1)%2]->data_;
+    double *__restrict__ field     = data_;
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    // At initialization, this data is NOT on the GPU
+    const bool should_manipulate_gpu_memory = (name[0] == 'J' || name[0] == 'R') &&
+                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub );
+    const unsigned field_first = ix;
+    const unsigned field_last  = ix + NX - 1;
+    #pragma omp target if( should_manipulate_gpu_memory ) \
+        map( tofrom : field [field_first:field_last - field_first] )
+    #pragma omp teams distribute parallel for
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+    int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
+    int fSize = number_of_points_;
+    bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
+    #pragma acc parallel copy(field[0:fSize]) present( sub[0:subSize] ) if (fieldName)
+    #pragma acc loop gang worker vector
+#endif
     for( unsigned int i=0; i<NX; i++ ) {
         field[ (ix+i) ] += sub[i];
     }
 }
+
diff --git a/src/Field/Field1D.h b/src/Field/Field1D.h
index 0ff09cd1e..228cc586f 100755
--- a/src/Field/Field1D.h
+++ b/src/Field/Field1D.h
@@ -92,7 +92,7 @@ class Field1D : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/Field2D.cpp b/src/Field/Field2D.cpp
index a089a0d45..94051fed6 100755
--- a/src/Field/Field2D.cpp
+++ b/src/Field/Field2D.cpp
@@ -71,7 +71,7 @@ Field2D::~Field2D()
     for (int iside=0 ; iside<(int)(sendFields_.size()) ; iside++ ) {
         if ( sendFields_[iside] != NULL ) {
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
             if ( sendFields_[iside]->isOnDevice() )
             {
                 sendFields_[iside]->deleteOnDevice();
@@ -220,7 +220,7 @@ double Field2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
 
@@ -247,7 +247,7 @@ double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3
                       map(to: ny, idxlocalstart[0], idxlocalstart[1], iystart, iyend) \
 		      /* is_device_ptr( data_ )*/ \
 		      reduction(+:nrj) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(2) reduction(+:nrj)
 #endif
@@ -333,7 +333,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size )
         sendFields_[iDim*2+iNeighbor] = new Field2D(size);
         recvFields_[iDim*2+iNeighbor] = new Field2D(size);
 
-#if defined( SMILEI_ACCELERATOR_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU ) 
        if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
            sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
            recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice();
@@ -341,7 +341,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size )
 #endif
     } 
     else if ( ghost_size != (int)(sendFields_[iDim*2+iNeighbor]->dims_[iDim]) ) {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         ERROR( "To Do GPU : envelope" );
 #endif
         delete sendFields_[iDim*2+iNeighbor];
@@ -381,7 +381,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     #pragma omp target if( should_manipulate_gpu_memory )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "B") );
@@ -389,7 +389,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -429,7 +429,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                                       \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     const int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "B" );
@@ -437,7 +437,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -477,7 +477,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
         map( to                                           \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub ));
@@ -486,7 +486,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
@@ -526,7 +526,7 @@ void Field2D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                                       \
              : field [field_first:field_last - field_first] )
     #pragma omp teams distribute parallel for collapse( 2 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
@@ -535,7 +535,7 @@ void Field2D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker vector
 #endif
         for( unsigned int j=0; j<NY; j++ ) {
diff --git a/src/Field/Field2D.h b/src/Field/Field2D.h
index a6938a468..490ecde85 100755
--- a/src/Field/Field2D.h
+++ b/src/Field/Field2D.h
@@ -97,7 +97,7 @@ class Field2D : public Field
 
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     //! Compute the norm 2 on device
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
diff --git a/src/Field/Field3D.cpp b/src/Field/Field3D.cpp
index 5a20964e7..e8c98d55b 100755
--- a/src/Field/Field3D.cpp
+++ b/src/Field/Field3D.cpp
@@ -4,7 +4,7 @@
 #include <iostream>
 #include <vector>
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -81,7 +81,7 @@ Field3D::~Field3D()
     for( unsigned int iside=0 ; iside<sendFields_.size() ; iside++ ) {
         if ( sendFields_[iside] != NULL ) {
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
 
             if ( sendFields_[iside]->isOnDevice() )
             {
@@ -102,7 +102,9 @@ Field3D::~Field3D()
         }
     }
     if( data_!=NULL ) {
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete (data_[0:number_of_points_]) if (acc_deviceptr(data_) != NULL)
+#endif
         delete [] data_;
         for( unsigned int i=0; i<dims_[0]; i++ ) {
             delete [] this->data_3D[i];
@@ -248,7 +250,7 @@ double Field3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 // Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     double nrj( 0. );
@@ -277,7 +279,7 @@ double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3
               map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) \
 	      /*is_device_ptr( data_ ) */           \
 	      reduction(+:nrj) 
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present(field[0:number_of_points_]) //deviceptr( data_ )
     #pragma acc loop gang worker vector collapse(3) reduction(+:nrj)
 #endif
@@ -405,7 +407,7 @@ void Field3D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
         sendFields_[iDim*2+iNeighbor] = new Field3D(size);
         recvFields_[iDim*2+iNeighbor] = new Field3D(size);
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
         if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) {
 
@@ -427,7 +429,7 @@ void Field3D::create_sub_fields  ( int iDim, int iNeighbor, int ghost_size )
 
     }
     else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         ERROR( "To Do GPU : envelope" );
 #endif
         delete sendFields_[iDim*2+iNeighbor];
@@ -463,7 +465,7 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
 
     #pragma omp target if( is_the_right_field )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "B") );
@@ -471,11 +473,11 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -514,7 +516,7 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                             \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     const int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "B" );
@@ -522,11 +524,11 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -566,7 +568,7 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
         map( to                                 \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int subSize = sendFields_[iDim*2+iNeighbor]->size();
     const int fSize = number_of_points_;
     bool fieldName( (name.substr(0,1) == "J") || (name.substr(0,1) == "R"));
@@ -575,11 +577,11 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
@@ -618,7 +620,7 @@ void Field3D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
         map( tofrom                             \
              : field [0:fSize] )
     #pragma omp teams distribute parallel for collapse( 3 )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size();
     int fSize = number_of_points_;
     bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R");
@@ -627,11 +629,11 @@ void Field3D::inject_fields_sum  ( int iDim, int iNeighbor, int ghost_size )
     #pragma acc loop gang
 #endif
     for( unsigned int i=0; i<(unsigned int)NX; i++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	#pragma acc loop worker
 #endif
         for( unsigned int j=0; j<(unsigned int)NY; j++ ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma acc loop vector
 #endif
             for( unsigned int k=0; k<(unsigned int)NZ; k++ ) {
diff --git a/src/Field/Field3D.h b/src/Field/Field3D.h
index cc9524790..9f9ce4c9a 100755
--- a/src/Field/Field3D.h
+++ b/src/Field/Field3D.h
@@ -100,7 +100,7 @@ class Field3D : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField.h b/src/Field/cField.h
index c37aa9514..d76de6ed7 100755
--- a/src/Field/cField.h
+++ b/src/Field/cField.h
@@ -63,7 +63,7 @@ class cField : public Field
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override = 0;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0;
 #endif
 
diff --git a/src/Field/cField1D.cpp b/src/Field/cField1D.cpp
index 77b0c2685..6a79da95a 100755
--- a/src/Field/cField1D.cpp
+++ b/src/Field/cField1D.cpp
@@ -191,7 +191,7 @@ double cField1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField1D.h b/src/Field/cField1D.h
index 43f2030e3..27b15bfc1 100755
--- a/src/Field/cField1D.h
+++ b/src/Field/cField1D.h
@@ -94,7 +94,7 @@ class cField1D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField2D.cpp b/src/Field/cField2D.cpp
index e1ca5560a..57ff6ea81 100755
--- a/src/Field/cField2D.cpp
+++ b/src/Field/cField2D.cpp
@@ -219,7 +219,7 @@ double cField2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField2D.h b/src/Field/cField2D.h
index d447d4f2e..26ee995c9 100755
--- a/src/Field/cField2D.h
+++ b/src/Field/cField2D.h
@@ -84,7 +84,7 @@ class cField2D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Field/cField3D.cpp b/src/Field/cField3D.cpp
index 84510f401..f4249e134 100755
--- a/src/Field/cField3D.cpp
+++ b/src/Field/cField3D.cpp
@@ -218,7 +218,7 @@ double cField3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 }
 
 //! Perform the norm2 on Device
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
 double cField3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] )
 {
     ERROR("Not implemented");
diff --git a/src/Field/cField3D.h b/src/Field/cField3D.h
index a81f293fc..0db1f6835 100755
--- a/src/Field/cField3D.h
+++ b/src/Field/cField3D.h
@@ -84,7 +84,7 @@ class cField3D : public cField
     virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override;
 
     //! Compute the norm2OnDevice of the field
-#if defined(SMILEI_ACCELERATOR_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU)
     virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final;
 #endif
 
diff --git a/src/Interpolator/Interpolator1D.cpp b/src/Interpolator/Interpolator1D.cpp
index e10b611bd..cdf84992c 100755
--- a/src/Interpolator/Interpolator1D.cpp
+++ b/src/Interpolator/Interpolator1D.cpp
@@ -11,7 +11,7 @@ Interpolator1D::Interpolator1D( Patch *patch )
     : Interpolator()
 {
 
-    index_domain_begin =  patch->getCellStartingGlobalIndex( 0 );
+    i_domain_begin_ =  patch->getCellStartingGlobalIndex( 0 );
     
 }
 
diff --git a/src/Interpolator/Interpolator1D.h b/src/Interpolator/Interpolator1D.h
index c1324e0a3..408b6ac3a 100755
--- a/src/Interpolator/Interpolator1D.h
+++ b/src/Interpolator/Interpolator1D.h
@@ -22,7 +22,7 @@ class Interpolator1D : public Interpolator
 protected:
     //! Inverse of the spatial-step
     double dx_inv_;
-    unsigned int index_domain_begin;
+    unsigned int i_domain_begin_;
 };
 
 #endif
diff --git a/src/Interpolator/Interpolator1D2Order.cpp b/src/Interpolator/Interpolator1D2Order.cpp
index e867b29be..f85e735de 100755
--- a/src/Interpolator/Interpolator1D2Order.cpp
+++ b/src/Interpolator/Interpolator1D2Order.cpp
@@ -8,13 +8,11 @@
 #include "Particles.h"
 #include "LaserEnvelope.h"
 
-
 using namespace std;
 
 Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Interpolator1D( patch )
 {
     dx_inv_ = 1.0/params.cell_length[0];
-
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
@@ -23,31 +21,45 @@ Interpolator1D2Order::Interpolator1D2Order( Params &params, Patch *patch ) : Int
 void Interpolator1D2Order::fields( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc )
 {
     // Static cast of the electromagnetic fields
-    Field1D *Ex1D     = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D     = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D     = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D_m   = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D_m   = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D_m   = static_cast<Field1D *>( EMfields->Bz_m );
+    Field1D *Ex1D = static_cast<Field1D *>( EMfields->Ex_ );
+    Field1D *Ey1D = static_cast<Field1D *>( EMfields->Ey_ );
+    Field1D *Ez1D = static_cast<Field1D *>( EMfields->Ez_ );
+    Field1D *Bx1D = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
 
     // Particle position (in units of the spatial-step)
-    double xpn = particles.position( 0, ipart )*dx_inv_;
+    double xpn = particles.position( 0, ipart ) * dx_inv_;
     // Calculate coeffs
     int idx_p[1], idx_d[1];
     double delta_p[1];
     double coeffxp[3];
     double coeffxd[3];
+
     coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
+    // Interpolation of Ex^(d)
+    /*ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    // Interpolation of Ey^(p)
+    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    // Interpolation of Ez^(p)
+    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    // Interpolation of Bx^(p)
+    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+    // Interpolation of By^(d)
+    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+    // Interpolation of Bz^(d)
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );*/
+
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D_m, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D_m, idx_d[0] );
+    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D, idx_d[0] );
+    *( BLoc+1*nparts ) = compute( coeffxd, By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D, idx_d[0] );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D_m, idx_p[0] );
+    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D, idx_p[0] );
+    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D, idx_p[0] );
+    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D, idx_p[0] );
 
 }//END Interpolator1D2Order
 
@@ -65,16 +77,16 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     }
 
     // Static cast of the electromagnetic fields
-    Field1D *Ex1D     = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D     = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D     = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D_m   = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D_m   = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D_m   = static_cast<Field1D *>( EMfields->Bz_m );
-    Field1D *Jx1D     = static_cast<Field1D *>( EMfields->Jx_ );
-    Field1D *Jy1D     = static_cast<Field1D *>( EMfields->Jy_ );
-    Field1D *Jz1D     = static_cast<Field1D *>( EMfields->Jz_ );
-    Field1D *Rho1D    = static_cast<Field1D *>( EMfields->rho_ );
+    Field1D *Ex1D  = static_cast<Field1D *>( EMfields->Ex_ );
+    Field1D *Ey1D  = static_cast<Field1D *>( EMfields->Ey_ );
+    Field1D *Ez1D  = static_cast<Field1D *>( EMfields->Ez_ );
+    Field1D *Bx1D  = static_cast<Field1D *>( EMfields->Bx_m );
+    Field1D *By1D  = static_cast<Field1D *>( EMfields->By_m );
+    Field1D *Bz1D  = static_cast<Field1D *>( EMfields->Bz_m );
+    Field1D *Jx1D  = static_cast<Field1D *>( EMfields->Jx_ );
+    Field1D *Jy1D  = static_cast<Field1D *>( EMfields->Jy_ );
+    Field1D *Jz1D  = static_cast<Field1D *>( EMfields->Jz_ );
+    Field1D *Rho1D = static_cast<Field1D *>( EMfields->rho_ );
     Field1D *By1DBTIS3;
     Field1D *Bz1DBTIS3;
     if (smpi->use_BTIS3){
@@ -89,19 +101,34 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
     double delta_p[1];
     double coeffxp[3];
     double coeffxd[3];
+
     coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
     int nparts( particles.numberOfParticles() );
 
+    // Interpolation of Ex^(d)
+    /*ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    // Interpolation of Ey^(p)
+    ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    // Interpolation of Ez^(p)
+    ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    // Interpolation of Bx^(p)
+    BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+    // Interpolation of By^(d)
+    BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+    // Interpolation of Bz^(d)
+    BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );*/
+
+
     // Interpolate the fields from the Dual grid : Ex, By, Bz
-    *( ELoc+0*nparts ) = compute( coeffxd, Ex1D,   idx_d[0] );
-    *( BLoc+1*nparts ) = compute( coeffxd, By1D_m, idx_d[0] );
-    *( BLoc+2*nparts ) = compute( coeffxd, Bz1D_m, idx_d[0] );
+    *( ELoc+0*nparts ) = compute( &coeffxd[0], Ex1D, idx_d[0] );
+    *( BLoc+1*nparts ) = compute( &coeffxd[0], By1D, idx_d[0] );
+    *( BLoc+2*nparts ) = compute( &coeffxd[0], Bz1D, idx_d[0] );
 
     // Interpolate the fields from the Primal grid : Ey, Ez, Bx
-    *( ELoc+1*nparts ) = compute( coeffxp, Ey1D,   idx_p[0] );
-    *( ELoc+2*nparts ) = compute( coeffxp, Ez1D,   idx_p[0] );
-    *( BLoc+0*nparts ) = compute( coeffxp, Bx1D_m, idx_p[0] );
+    *( ELoc+1*nparts ) = compute( &coeffxp[0], Ey1D, idx_p[0] );
+    *( ELoc+2*nparts ) = compute( &coeffxp[0], Ez1D, idx_p[0] );
+    *( BLoc+0*nparts ) = compute( &coeffxp[0], Bx1D, idx_p[0] );
 
     // Interpolate the fields from the Primal grid : Jy, Jz, Rho
     JLoc->y = compute(     coeffxp, Jy1D,  idx_p[0] );
@@ -115,7 +142,6 @@ void Interpolator1D2Order::fieldsAndCurrents( ElectroMagn *EMfields, Particles &
         *( BLocyBTIS3+0*nparts ) = compute( coeffxp, By1DBTIS3, idx_p[0] );
         *( BLoczBTIS3+0*nparts ) = compute( coeffxp, Bz1DBTIS3, idx_p[0] );
     }
-
 }
 
 // Interpolator on another field than the basic ones
@@ -127,7 +153,7 @@ void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *i
     double coeffxp[3];
     double coeffxd[3];
     double *coeff = F->isDual( 0 ) ? coeffxd : coeffxp;
-    int *i = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];
+    int    *i     = F->isDual( 0 ) ? &idx_d[0] : &idx_p[0];
 
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
         double xpn = particles.position( 0, ipart )*dx_inv_;
@@ -136,26 +162,133 @@ void Interpolator1D2Order::oneField( Field **field, Particles &particles, int *i
     }
 }
 
-void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, unsigned int, int )
+void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields,
+                                          Particles &particles, SmileiMPI *smpi,
+                                          int *istart, int *iend, int ithread, unsigned int, int )
 {
-    double *Epart = &( smpi->dynamics_Epart[ithread][0] );
-    double *Bpart = &( smpi->dynamics_Bpart[ithread][0] );
-    int    *iold  = &( smpi->dynamics_iold[ithread][0] );
-    double *delta = &( smpi->dynamics_deltaold[ithread][0] );
+    double *const __restrict__ ELoc = smpi->dynamics_Epart[ithread].data();
+    double *const __restrict__ BLoc = smpi->dynamics_Bpart[ithread].data();
 
-    // Static cast of the electromagnetic fields
-    Field1D *Ex1D = static_cast<Field1D *>( EMfields->Ex_ );
-    Field1D *Ey1D = static_cast<Field1D *>( EMfields->Ey_ );
-    Field1D *Ez1D = static_cast<Field1D *>( EMfields->Ez_ );
-    Field1D *Bx1D = static_cast<Field1D *>( EMfields->Bx_m );
-    Field1D *By1D = static_cast<Field1D *>( EMfields->By_m );
-    Field1D *Bz1D = static_cast<Field1D *>( EMfields->Bz_m );
+    int    *const __restrict__ iold  = smpi->dynamics_iold[ithread].data();
+    double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();
 
+    const double *const __restrict__ position_x = particles.getPtrPosition( 0 );
+
+    // Static cast of the electromagnetic fields
+    const double *const __restrict__ Ex1D = static_cast<Field1D *>( EMfields->Ex_ )->data();
+    const double *const __restrict__ Ey1D = static_cast<Field1D *>( EMfields->Ey_ )->data();
+    const double *const __restrict__ Ez1D = static_cast<Field1D *>( EMfields->Ez_ )->data();
+    const double *const __restrict__ Bx1D = static_cast<Field1D *>( EMfields->Bx_m )->data();
+    const double *const __restrict__ By1D = static_cast<Field1D *>( EMfields->By_m )->data();
+    const double *const __restrict__ Bz1D = static_cast<Field1D *>( EMfields->Bz_m )->data();
+
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
+    const int sizeofEx = EMfields->Ex_->size();
+    const int sizeofEy = EMfields->Ey_->size();
+    const int sizeofEz = EMfields->Ez_->size();
+    const int sizeofBx = EMfields->Bx_m->size();
+    const int sizeofBy = EMfields->By_m->size();
+    const int sizeofBz = EMfields->Bz_m->size();
+#endif
 
     //Loop on bin particles
-    int nparts = particles.numberOfParticles();
-    
-    if (!smpi->use_BTIS3){ // without BTIS-3 interpolation
+    const int nparts = particles.numberOfParticles();
+    const int first_index = *istart;
+    const int last_index  = *iend;
+    double accdx_inv[2];
+    accdx_inv[0]= dx_inv_;
+
+    if (!smpi->use_BTIS3){
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+    #pragma omp target map( to : i_domain_begin_) is_device_ptr (position_x)
+    #pragma omp teams distribute parallel for
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
+    #pragma acc enter data create(this)
+    #pragma acc update device(this)
+    size_t interpolation_range_size = ( last_index + 0 * nparts ) - first_index;
+    #pragma acc parallel present(ELoc [first_index:interpolation_range_size],\
+                                 BLoc [first_index:interpolation_range_size],\
+                                 iold  [first_index:interpolation_range_size],\
+                                 delta [first_index:interpolation_range_size],\
+                                 Ex1D [0:sizeofEx],\
+                                 Ey1D [0:sizeofEy],\
+                                 Ez1D [0:sizeofEz],\
+                                 Bx1D [0:sizeofBx],\
+                                 By1D [0:sizeofBy],\
+                                 Bz1D [0:sizeofBz])\
+    deviceptr(position_x)              \
+    copyin(accdx_inv[0:2]) //copyin(dx_inv_[:1])     //copyin(dx_inv_)
+    #pragma acc loop gang worker vector
+#endif
+    for( int ipart = first_index; ipart < last_index; ipart++ ) {
+            // Normalized particle position
+            const double xpn = position_x[ipart] *  accdx_inv[0];
+            // Calculate coeffs
+            int idx_p[1], idx_d[1];
+            double delta_p[1];
+            double coeffxp[3];
+            double coeffxd[3];
+
+            coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
+
+            // Interpolation of Ex^(d)
+            ELoc[0*nparts+ipart] = compute( &coeffxd[0], Ex1D, idx_d[0] );
+            // Interpolation of Ey^(p)
+            ELoc[1*nparts+ipart] = compute( &coeffxp[0], Ey1D, idx_p[0] );
+            // Interpolation of Ez^(p)
+            ELoc[2*nparts+ipart] = compute( &coeffxp[0], Ez1D, idx_p[0] );
+            // Interpolation of Bx^(p)
+            BLoc[0*nparts+ipart] = compute( &coeffxp[0], Bx1D, idx_p[0] );
+            // Interpolation of By^(d)
+            BLoc[1*nparts+ipart] = compute( &coeffxd[0], By1D, idx_d[0] );
+            // Interpolation of Bz^(d)
+            BLoc[2*nparts+ipart] = compute( &coeffxd[0], Bz1D, idx_d[0] );
+
+            //Buffering of iol and delta
+            iold[0*nparts+ipart]  = idx_p[0];
+            delta[0*nparts+ipart] = delta_p[0];
+
+    } // end ipart loop
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
+        #pragma acc exit data delete(this)
+    #endif
+
+    }else { 
+        
+        double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
+        double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  ); //*/
+        //double *const __restrict__ BypartBTIS3  = smpi->dynamics_Bpart_yBTIS3[ithread].data();
+        //double *const __restrict__ BzpartBTIS3  = smpi->dynamics_Bpart_zBTIS3[ithread].data();
+
+        const double *const __restrict__ By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 )->data();
+        const double *const __restrict__ Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 )->data();
+            
+/*
+#if defined( SMILEI_ACCELERATOR_GPU_OMP )
+        #pragma omp target map( to : i_domain_begin_) is_device_ptr ( position_x)
+        #pragma omp teams distribute parallel for
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
+        #pragma acc enter data create(this)
+        #pragma acc update device(this)
+        size_t interpolation_range_size = ( last_index + 0 * nparts ) - first_index;
+        #pragma acc parallel present(ELoc [first_index:interpolation_range_size],\
+                                    BLoc [first_index:interpolation_range_size],\
+                                    BypartBTIS3 [first_index:interpolation_range_size],\
+                                    BzpartBTIS3 [first_index:interpolation_range_size],\
+                                    iold [first_index:interpolation_range_size],\
+                                    delta [first_index:interpolation_range_size],\
+                                    Ex1D [0:sizeofEx],\
+                                    Ey1D [0:sizeofEy],\
+                                    Ez1D [0:sizeofEz],\
+                                    Bx1D [0:sizeofBx],\
+                                    By1D [0:sizeofBy],\
+                                    Bz1D [0:sizeofBz],\
+                                    By1D_mBTIS3 [0:sizeofEz],\
+                                    Bz1D_mBTIS3 [0:sizeofEy])\
+        deviceptr(position_x)              \
+        copyin(d_inv_)
+        #pragma acc loop gang worker vector
+#endif  //*/
         for (int ipart=*istart; ipart < *iend; ipart++){
 
             // Normalized particle position
@@ -166,70 +299,39 @@ void Interpolator1D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
             double delta_p[1];
             double coeffxp[3];
             double coeffxd[3];
+
             coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
             // Interpolation of Ex^(d)
-            *( Epart+0*nparts+ipart ) = compute( coeffxd, Ex1D, idx_d[0] );
+            ELoc[0*nparts+ipart] = compute( coeffxd, Ex1D, idx_d[0] );
             // Interpolation of Ey^(p)
-            *( Epart+1*nparts+ipart ) = compute( coeffxp, Ey1D, idx_p[0] );
+            ELoc[1*nparts+ipart] = compute( coeffxp, Ey1D, idx_p[0] );
             // Interpolation of Ez^(p)
-            *( Epart+2*nparts+ipart ) = compute( coeffxp, Ez1D, idx_p[0] );
+            ELoc[2*nparts+ipart] = compute( coeffxp, Ez1D, idx_p[0] );
             // Interpolation of Bx^(p)
-            *( Bpart+0*nparts+ipart ) = compute( coeffxp, Bx1D, idx_p[0] );
+            BLoc[0*nparts+ipart] = compute( coeffxp, Bx1D, idx_p[0] );
             // Interpolation of By^(d)
-            *( Bpart+1*nparts+ipart ) = compute( coeffxd, By1D, idx_d[0] );
+            BLoc[1*nparts+ipart] = compute( coeffxd, By1D, idx_d[0] );
             // Interpolation of Bz^(d)
-            *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
+            BLoc[2*nparts+ipart] = compute( coeffxd, Bz1D, idx_d[0] );
+            // Interpolation of ByBTIS3^(p)
+            *( BypartBTIS3+0*nparts+ipart )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            // Interpolation of BzBTIS3^(p)
+            *( BzpartBTIS3+0*nparts+ipart )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            // Interpolation of ByBTIS3^(p)
+            //BypartBTIS3[0*nparts+ipart ]  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            // Interpolation of BzBTIS3^(p)
+            //BzpartBTIS3[0*nparts+ipart ]  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
 
             //Buffering of iol and delta
-            *( iold+0*nparts+ipart)   = idx_p[0];
-            *( delta+0*nparts+ipart)  = delta_p[0];
-
+            iold[0*nparts+ipart]  = idx_p[0];
+            delta[0*nparts+ipart] = delta_p[0];
         } // end ipart loop
-    } else { // with B-TIS3 interpolation
-      
-      Field1D *By1D_mBTIS3 = static_cast<Field1D *>( EMfields->By_mBTIS3 );
-      Field1D *Bz1D_mBTIS3 = static_cast<Field1D *>( EMfields->Bz_mBTIS3 );
-      double  *BypartBTIS3 = &( smpi->dynamics_Bpart_yBTIS3[ithread][0]  );
-      double  *BzpartBTIS3 = &( smpi->dynamics_Bpart_zBTIS3[ithread][0]  );
-      
-      for (int ipart=*istart; ipart < *iend; ipart++){
-
-          // Normalized particle position
-          double xpn = particles.position( 0, ipart )*dx_inv_;
-
-          // Calculate coeffs
-          int idx_p[1], idx_d[1];
-          double delta_p[1];
-          double coeffxp[3];
-          double coeffxd[3];
-
-          coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
-
-          // Interpolation of Ex^(d)
-          *( Epart+0*nparts+ipart ) = compute( coeffxd, Ex1D, idx_d[0] );
-          // Interpolation of Ey^(p)
-          *( Epart+1*nparts+ipart ) = compute( coeffxp, Ey1D, idx_p[0] );
-          // Interpolation of Ez^(p)
-          *( Epart+2*nparts+ipart ) = compute( coeffxp, Ez1D, idx_p[0] );
-          // Interpolation of Bx^(p)
-          *( Bpart+0*nparts+ipart ) = compute( coeffxp, Bx1D, idx_p[0] );
-          // Interpolation of By^(d)
-          *( Bpart+1*nparts+ipart ) = compute( coeffxd, By1D, idx_d[0] );
-          // Interpolation of Bz^(d)
-          *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
-          // Interpolation of ByBTIS3^(p)
-          *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
-          // Interpolation of BzBTIS3^(p)
-          *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
-          
-          //Buffering of iol and delta
-          *( iold+0*nparts+ipart)   = idx_p[0];
-          *( delta+0*nparts+ipart)  = delta_p[0];
-
-      } // end ipart loop
-      
-    }
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
+        #pragma acc exit data delete(this)
+    #endif 
+    } // end with B-TIS interpolation
+
 }
 
 // Interpolator specific to tracked particles. A selection of particles may be provided
@@ -350,9 +452,9 @@ void Interpolator1D2Order::fieldsAndEnvelope( ElectroMagn *EMfields, Particles &
             // Interpolation of Bz^(d)
             *( Bpart+2*nparts+ipart ) = compute( coeffxd, Bz1D, idx_d[0] );
             // Interpolation of ByBTIS3^(p)
-            *( BypartBTIS3+0*nparts )  = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
+            *( BypartBTIS3+0*nparts) = compute( coeffxp, By1D_mBTIS3, idx_p[0] );
             // Interpolation of BzBTIS3^(p)
-            *( BzpartBTIS3+0*nparts )  = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
+            *( BzpartBTIS3+0*nparts) = compute( coeffxp, Bz1D_mBTIS3, idx_p[0] );
             // Interpolation of Phi^(p)
             *( PHIpart+0*nparts+ipart )     = compute( coeffxp, Phi1D, idx_p[0] );
             // Interpolation of GradPhix^(p)
@@ -394,11 +496,12 @@ void Interpolator1D2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Particle
 
         // Calculate coeffs
 
-        int idx_p[1];
+        int idx_p[1], idx_d[1];
         double delta_p[1];
         double coeffxp[3];
+        double coeffxd[3];
 
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+        coeffs( xpn, idx_p, idx_d, coeffxp, coeffxd, delta_p );
 
         // Interpolation of Phi^(p)
         *( PHI_mpart+0*nparts+ipart )     = compute( coeffxp, Phi_m1D, idx_p[0] );
@@ -428,31 +531,46 @@ void Interpolator1D2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, Par
     // Normalized particle position
     double xpn = particles.position( 0, ipart )*dx_inv_;
 
-    // Indexes of the central nodes
-    int idx_p[1];
-    double delta_p[1];
+    // Calculate coeffs
     double coeffxp[3];
-    coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+
+    // Indexes of the central nodes
+    int ip = round( xpn );
+
+    // Declaration and calculation of the coefficient for interpolation
+    double deltax, delta2;
+
+    deltax     = xpn - ( double )ip;
+    delta2     = deltax*deltax;
+    coeffxp[0] = 0.5 * ( delta2-deltax+0.25 );
+    coeffxp[1] = 0.75 - delta2;
+    coeffxp[2] = 0.5 * ( delta2+deltax+0.25 );
+
+
+    //!\todo CHECK if this is correct for both primal & dual grids !!!
+    // First index for summation
+    ip = ip - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
     // -------------------------
-    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, idx_p[0] ); 
+    *( Env_A_abs_Loc )  = compute( coeffxp, Env_A_abs_1D, ip ); //compute( &coeffp_[1], Env_A_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Chi_^(p)
     // -------------------------
-    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, idx_p[0] ); 
+    *( Env_Chi_Loc )    = compute( coeffxp, Env_Chi_1D, ip ); //compute( &coeffp_[1], Env_Chi_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_E_abs_^(p)
     // -------------------------
-    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, idx_p[0] ); 
+    *( Env_E_abs_Loc )  = compute( coeffxp, Env_E_abs_1D, ip ); // compute( &coeffp_[1], Env_E_abs_1D, ip_ );
 
     // -------------------------
     // Interpolation of Env_Ex_abs_^(p)
     // -------------------------
-    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, idx_p[0] ); 
+    *( Env_Ex_abs_Loc ) = compute( coeffxp, Env_Ex_abs_1D, ip ); // compute( &coeffp_[1], Env_Ex_abs_1D, ip_ );
+
 
 } // END Interpolator1D2Order
 
@@ -466,13 +584,26 @@ void Interpolator1D2Order::envelopeFieldForIonization( ElectroMagn *EMfields, Pa
     //Loop on bin particles
     for( int ipart=*istart ; ipart<*iend; ipart++ ) {
 
-        // Normalized particle position
-        double xpn = particles.position( 0, ipart )*dx_inv_;
-
         int idx_p[1];
         double delta_p[1];
         double coeffxp[3];
-        coeffs( xpn, idx_p, NULL, coeffxp, NULL, delta_p );
+
+        // Normalized particle position
+        double xpn = particles.position( 0, ipart )*dx_inv_;
+
+        double delta2;
+
+        // Primal
+        idx_p[0]     = round( xpn );                 // index of the central point
+        delta_p[0]   = xpn -( double )idx_p[0];      // normalized distance to the central node
+        delta2       = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
+
+        // 2nd order interpolation on 3 nodes
+        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
+        coeffxp[1]   = ( 0.75-delta2 );
+        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
+
+        idx_p[0]    -= i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1D2Order.h b/src/Interpolator/Interpolator1D2Order.h
index 268e33b5a..44e6651d4 100755
--- a/src/Interpolator/Interpolator1D2Order.h
+++ b/src/Interpolator/Interpolator1D2Order.h
@@ -5,6 +5,8 @@
 #include "Interpolator1D.h"
 
 #include "Field1D.h"
+#include "gpu.h"
+
 //  --------------------------------------------------------------------------------------------------------------------
 //! Class for 2nd order interpolator for 1Dcartesian simulations
 //  --------------------------------------------------------------------------------------------------------------------
@@ -13,7 +15,7 @@ class Interpolator1D2Order final : public Interpolator1D
 
 public:
     Interpolator1D2Order( Params &, Patch * );
-    ~Interpolator1D2Order() override final {};
+    ~Interpolator1D2Order() override {}; //final
     
     inline void __attribute__((always_inline)) fields( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc );
     inline void __attribute__((always_inline)) fieldsForTasks( ElectroMagn *EMfields, Particles &particles, int ipart, int nparts, double *ELoc, double *BLoc, int *iold, double *delta );
@@ -22,11 +24,23 @@ class Interpolator1D2Order final : public Interpolator1D
     void fieldsSelection( ElectroMagn *EMfields, Particles &particles, double *buffer, int offset, std::vector<unsigned int> *selection ) override final;
     void oneField( Field **field, Particles &particles, int *istart, int *iend, double *FieldLoc, double *l1=NULL, double *l2=NULL, double *l3=NULL ) override final;
 
-    inline double __attribute__((always_inline)) compute( double *coeff, Field1D *f, int idx )
+    inline double __attribute__((always_inline)) 
+    compute( double *coeff, Field1D *f, int idx )
     {
         double interp_res =  coeff[0] * ( *f )( idx-1 )   + coeff[1] * ( *f )( idx )   + coeff[2] * ( *f )( idx+1 );
         return interp_res;
-    };
+    }
+
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE
+    static inline double __attribute__((always_inline))
+    compute( const double *__restrict__ coeff,
+             const double *__restrict__ f, 
+             int idx )
+    {
+        double interp_res = coeff[0] * f[idx-1] + coeff[1] * f[idx] + coeff[2] * f[idx+1];
+        return interp_res;
+    }
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
 
     void fieldsAndEnvelope( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
     void timeCenteredEnvelope( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
@@ -34,39 +48,39 @@ class Interpolator1D2Order final : public Interpolator1D
     void envelopeFieldForIonization( ElectroMagn *EMfields, Particles &particles, SmileiMPI *smpi, int *istart, int *iend, int ithread, int ipart_ref = 0 ) override final;
 
 private:
-    inline void coeffs( double xpn, int* idx_p, int* idx_d,
-                        double *coeffxp, double *coeffxd, double* delta_p )
+    
+    // 2nd order interpolation on 3 nodes
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE
+    inline void __attribute__( ( always_inline ) )
+    coeffs( double xpn, int* idx_p, int* idx_d,
+            double *coeffxp, double *coeffxd, double* delta_p ) const
     {
         double delta, delta2;
         
-        // Primal
-        idx_p[0]    = round( xpn );                 // index of the central point
-        delta_p[0]  = xpn -( double )idx_p[0];      // normalized distance to the central node
-        delta2      = pow( delta_p[0], 2 );         // square of the normalized distance to the central node
-        
-        // 2nd order interpolation on 3 nodes
-        coeffxp[0]   = 0.5 * ( delta2-delta_p[0]+0.25 );
-        coeffxp[1]   = ( 0.75-delta2 );
-        coeffxp[2]   = 0.5 * ( delta2+delta_p[0]+0.25 );
+        // index of the central point
+        idx_p[0]   = std::round( xpn );
+        idx_d[0]   = std::round( xpn + 0.5 );
+
+        delta      = xpn - static_cast<double>( idx_d[0] ) + 0.5; // normalized distance to the central node
+        delta2     = delta * delta;                   // square of the normalized distance to the central node
         
-        idx_p[0]   -= index_domain_begin;
+        coeffxd[0] = 0.5 * ( delta2 - delta + 0.25 );
+        coeffxd[1] = ( 0.75 - delta2 );
+        coeffxd[2] = 0.5 * ( delta2 + delta + 0.25 );
+
+        delta      = xpn - static_cast<double>( idx_p[0] );
+        delta2     = delta * delta; // pow( delta_p[0], 2 );   // square of the normalized distance to the central node
 
-        if(idx_d){
-            // Dual
-            idx_d[0]    = round( xpn+0.5 );              // index of the central point
-            delta       = xpn - ( double )idx_d[0] +0.5; // normalized distance to the central node
-            delta2      = delta*delta;                   // square of the normalized distance to the central node
-            
-            // 2nd order interpolation on 3 nodes
-            coeffxd[0]   = 0.5 * ( delta2-delta+0.25 );
-            coeffxd[1]   = ( 0.75-delta2 );
-            coeffxd[2]   = 0.5 * ( delta2+delta+0.25 );
-            
-            idx_d[0]   -= index_domain_begin;
-        }
+        delta_p[0] = delta;   // normalized distance to the central node	
+        coeffxp[0] = 0.5 * ( delta2 - delta_p[0] + 0.25 );
+        coeffxp[1] = ( 0.75 - delta2 );
+        coeffxp[2] = 0.5 * ( delta2 + delta_p[0] + 0.25 );
+        
+        idx_p[0] = idx_p[0] - i_domain_begin_;
+        idx_d[0] = idx_d[0] - i_domain_begin_;
         
     }    
-
+    SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
 };//END class
 
 #endif
diff --git a/src/Interpolator/Interpolator1D2OrderV.cpp b/src/Interpolator/Interpolator1D2OrderV.cpp
old mode 100644
new mode 100755
index 31c3b7d4c..2b99cc66b
--- a/src/Interpolator/Interpolator1D2OrderV.cpp
+++ b/src/Interpolator/Interpolator1D2OrderV.cpp
@@ -176,7 +176,7 @@ void Interpolator1D2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &par
         coeffd[1] = ( 0.75-xjmxi2 );
         coeffd[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        idx -= index_domain_begin;
+        idx -= i_domain_begin_;
 
         // Primal
         ipx      = round( xjn );    // index of the central point
@@ -188,7 +188,7 @@ void Interpolator1D2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &par
         coeffp[1] = ( 0.75-xjmxi2 );
         coeffp[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        ipx -= index_domain_begin;
+        ipx -= i_domain_begin_;
 
         // // Interpolate the fields from the Dual grid : Ex, By, Bz
         Epart_x[ipart] = coeffd[0] * Ex[idx-1]   + coeffd[1] * Ex[idx]   + coeffd[2] * Ex[idx+1];
@@ -329,7 +329,7 @@ void Interpolator1D2OrderV::timeCenteredEnvelope( ElectroMagn *EMfields, Particl
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -388,7 +388,7 @@ void Interpolator1D2OrderV::envelopeAndSusceptibility( ElectroMagn *EMfields, Pa
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -441,7 +441,7 @@ void Interpolator1D2OrderV::envelopeFieldForIonization( ElectroMagn *EMfields, P
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1D2OrderV.h b/src/Interpolator/Interpolator1D2OrderV.h
old mode 100644
new mode 100755
index b7dce6588..7c72f9ca2
--- a/src/Interpolator/Interpolator1D2OrderV.h
+++ b/src/Interpolator/Interpolator1D2OrderV.h
@@ -48,7 +48,7 @@ class Interpolator1D2OrderV final : public Interpolator1D
         coeffd_[1] = ( 0.75-xjmxi2 );
         coeffd_[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -60,7 +60,7 @@ class Interpolator1D2OrderV final : public Interpolator1D
         coeffp_[1] = ( 0.75-xjmxi2 );
         coeffp_[2] = 0.5 * ( xjmxi2+xjmxi+0.25 );
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Last prim index computed
diff --git a/src/Interpolator/Interpolator1D3Order.h b/src/Interpolator/Interpolator1D3Order.h
index e9c821925..3228ed39b 100755
--- a/src/Interpolator/Interpolator1D3Order.h
+++ b/src/Interpolator/Interpolator1D3Order.h
@@ -42,7 +42,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffd_[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffd_[3]  = xi3*dble_1ov6;
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_ = ( int )xjn;          // index of the 2nd node
@@ -56,7 +56,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffp_[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffp_[3]  = xi3*dble_1ov6;
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     inline void coeffs( double xpn, int* idx_p, int* idx_d,
@@ -77,7 +77,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffxd[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffxd[3]  = xi3*dble_1ov6;
 
-        idx_d[0] -= index_domain_begin;
+        idx_d[0] -= i_domain_begin_;
 
         // Primal
         idx_p[0] = ( int )xpn;          // index of the 2nd node
@@ -92,7 +92,7 @@ class Interpolator1D3Order final : public Interpolator1D
         coeffxp[2]  = dble_1ov6 + 0.5*( xi+xi2-xi3 );
         coeffxp[3]  = xi3*dble_1ov6;
 
-        idx_p[0] -= index_domain_begin;
+        idx_p[0] -= i_domain_begin_;
             
     }    
     // Last prim index computed
diff --git a/src/Interpolator/Interpolator1D4Order.h b/src/Interpolator/Interpolator1D4Order.h
index f8bd48ee4..0e8831091 100755
--- a/src/Interpolator/Interpolator1D4Order.h
+++ b/src/Interpolator/Interpolator1D4Order.h
@@ -53,7 +53,7 @@ class Interpolator1D4Order final : public Interpolator1D
         coeffxp[3] = dble_19_ov_96   + dble_11_ov_24 * delta_p[0]  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
         coeffxp[4] = dble_1_ov_384   + dble_1_ov_48  * delta_p[0]  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
         
-        idx_p[0]  -= index_domain_begin;
+        idx_p[0]  -= i_domain_begin_;
 
         if(idx_d){   
             // Dual
@@ -70,7 +70,7 @@ class Interpolator1D4Order final : public Interpolator1D
             coeffxd[3] = dble_19_ov_96   + dble_11_ov_24 * delta  + dble_1_ov_4 * delta2  - dble_1_ov_6  * delta3 - dble_1_ov_6  * delta4;
             coeffxd[4] = dble_1_ov_384   + dble_1_ov_48  * delta  + dble_1_ov_16 * delta2 + dble_1_ov_12 * delta3 + dble_1_ov_24 * delta4;
             
-            idx_d[0]  -= index_domain_begin;
+            idx_d[0]  -= i_domain_begin_;
         }
     }
     
diff --git a/src/Interpolator/Interpolator1DWT2Order.cpp b/src/Interpolator/Interpolator1DWT2Order.cpp
index 2ba3881b5..4bc058096 100755
--- a/src/Interpolator/Interpolator1DWT2Order.cpp
+++ b/src/Interpolator/Interpolator1DWT2Order.cpp
@@ -239,7 +239,7 @@ void Interpolator1DWT2Order::timeCenteredEnvelope( ElectroMagn *EMfields, Partic
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -298,7 +298,7 @@ void Interpolator1DWT2Order::envelopeAndSusceptibility( ElectroMagn *EMfields, P
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -351,7 +351,7 @@ void Interpolator1DWT2Order::envelopeFieldForIonization( ElectroMagn *EMfields,
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1DWT2Order.h b/src/Interpolator/Interpolator1DWT2Order.h
index 19ea0ee7d..ff45230cf 100755
--- a/src/Interpolator/Interpolator1DWT2Order.h
+++ b/src/Interpolator/Interpolator1DWT2Order.h
@@ -47,7 +47,7 @@ class Interpolator1DWT2Order final : public Interpolator1D
         coeffd_[1] = ( 0.75-var1 );
         coeffd_[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -65,7 +65,7 @@ class Interpolator1DWT2Order final : public Interpolator1D
         coeffpt_[1] = 1.0 - 2.0 * var1;
         coeffpt_[2] = var1 + 0.5 * xjmxi;
         
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Coefficients for WT
diff --git a/src/Interpolator/Interpolator1DWT2OrderV.cpp b/src/Interpolator/Interpolator1DWT2OrderV.cpp
index c64433035..40dd63589 100755
--- a/src/Interpolator/Interpolator1DWT2OrderV.cpp
+++ b/src/Interpolator/Interpolator1DWT2OrderV.cpp
@@ -178,7 +178,7 @@ void Interpolator1DWT2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &p
         coeffd[1] = ( 0.75-var1 );
         coeffd[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        idx -= index_domain_begin;
+        idx -= i_domain_begin_;
 
         // Primal
         ipx      = round( xjn );    // index of the central point
@@ -190,7 +190,7 @@ void Interpolator1DWT2OrderV::fieldsWrapper( ElectroMagn *EMfields, Particles &p
         coeffpt[1] = 1.0 - 2.0 * var1;
         coeffpt[2] = var1 + 0.5 * xjmxi;
 
-        ipx -= index_domain_begin;
+        ipx -= i_domain_begin_;
 
         // // Interpolate the fields from the Dual grid : Ex, By, Bz
         Epart_x[ipart] = coeffd[0] * Ex[idx-1]   + coeffd[1] * Ex[idx]   + coeffd[2] * Ex[idx+1];
@@ -331,7 +331,7 @@ void Interpolator1DWT2OrderV::timeCenteredEnvelope( ElectroMagn *EMfields, Parti
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // -------------------------
         // Interpolation of Phiold^(p)
@@ -390,7 +390,7 @@ void Interpolator1DWT2OrderV::envelopeAndSusceptibility( ElectroMagn *EMfields,
 
     //!\todo CHECK if this is correct for both primal & dual grids !!!
     // First index for summation
-    ip_ = ip_ - index_domain_begin;
+    ip_ = ip_ - i_domain_begin_;
 
     // -------------------------
     // Interpolation of Env_A_abs_^(p)
@@ -443,7 +443,7 @@ void Interpolator1DWT2OrderV::envelopeFieldForIonization( ElectroMagn *EMfields,
 
         //!\todo CHECK if this is correct for both primal & dual grids !!!
         // First index for summation
-        ip_ = ip_ - index_domain_begin;
+        ip_ = ip_ - i_domain_begin_;
 
         // ---------------------------------
         // Interpolation of Env_E_abs^(p)
diff --git a/src/Interpolator/Interpolator1DWT2OrderV.h b/src/Interpolator/Interpolator1DWT2OrderV.h
index 87a083fa5..4f20849c1 100755
--- a/src/Interpolator/Interpolator1DWT2OrderV.h
+++ b/src/Interpolator/Interpolator1DWT2OrderV.h
@@ -48,7 +48,7 @@ class Interpolator1DWT2OrderV final : public Interpolator1D
         coeffd_[1] = ( 0.75-var1 );
         coeffd_[2] = 0.5 * ( var1+xjmxi+0.25 );
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -66,7 +66,7 @@ class Interpolator1DWT2OrderV final : public Interpolator1D
         coeffpt_[1] = 1.0 - 2.0 * var1;
         coeffpt_[2] = var1 + 0.5 * xjmxi;
 
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     // Coefficients for WT
diff --git a/src/Interpolator/Interpolator1DWT4Order.h b/src/Interpolator/Interpolator1DWT4Order.h
index dd5e78b13..6bc889885 100755
--- a/src/Interpolator/Interpolator1DWT4Order.h
+++ b/src/Interpolator/Interpolator1DWT4Order.h
@@ -55,7 +55,7 @@ class Interpolator1DWT4Order final : public Interpolator1D
         coeffd_[3] = dble_19_ov_96 + var1 + var3 * ( 1.5-xjmxi -var2 );
         coeffd_[4] = dble_1_ov_24 * var5 * var5;
 
-        id_ -= index_domain_begin;
+        id_ -= i_domain_begin_;
 
         // Primal
         ip_      = round( xjn );    // index of the central point
@@ -94,7 +94,7 @@ class Interpolator1DWT4Order final : public Interpolator1D
         coeffpt_[4] = var3 + var2 - var1;
         
         
-        ip_ -= index_domain_begin;
+        ip_ -= i_domain_begin_;
     }
 
     double dble_1_ov_6 ;
diff --git a/src/Interpolator/Interpolator2D2Order.cpp b/src/Interpolator/Interpolator2D2Order.cpp
index 0254294f5..795ab996d 100755
--- a/src/Interpolator/Interpolator2D2Order.cpp
+++ b/src/Interpolator/Interpolator2D2Order.cpp
@@ -180,7 +180,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
     const double *const __restrict__ By2D = static_cast<Field2D *>( EMfields->By_m )->data();
     const double *const __restrict__ Bz2D = static_cast<Field2D *>( EMfields->Bz_m )->data();
 
-#if defined(SMILEI_OPENACC_MODE)    
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)    
     const int sizeofEx = EMfields->Ex_->size();
     const int sizeofEy = EMfields->Ey_->size();
     const int sizeofEz = EMfields->Ez_->size();
@@ -207,7 +207,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
                                   position_x /* [first_index:npart_range_size] */, \
                                   position_y /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
@@ -260,7 +260,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
         delta[1*nparts+ipart] = delta_p[1];
         
     }
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
     } else{ // with B-TIS3 interpolation
@@ -276,7 +276,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
                                   position_x /* [first_index:npart_range_size] */, \
                                   position_y /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index;
@@ -337,7 +337,7 @@ void Interpolator2D2Order::fieldsWrapper(   ElectroMagn *EMfields,
             delta[1*nparts+ipart] = delta_p[1];
 
         } // end ipart loop
-    #if defined(SMILEI_OPENACC_MODE)
+    #if defined(SMILEI_ACCELERATOR_GPU_OACC)
         #pragma acc exit data delete(this)
     #endif
     } // end with B-TIS interpolation
diff --git a/src/Interpolator/Interpolator3D2Order.cpp b/src/Interpolator/Interpolator3D2Order.cpp
index 9e594f20b..f40239836 100755
--- a/src/Interpolator/Interpolator3D2Order.cpp
+++ b/src/Interpolator/Interpolator3D2Order.cpp
@@ -185,8 +185,6 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
     int *const __restrict__ iold     = smpi->dynamics_iold[ithread].data();
     double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data();
 
-    unsigned int buffer_size = smpi->dynamics_Epart[ithread].size();
-
     const double *const __restrict__ position_x = particles.getPtrPosition( 0 );
     const double *const __restrict__ position_y = particles.getPtrPosition( 1 );
     const double *const __restrict__ position_z = particles.getPtrPosition( 2 );
@@ -198,7 +196,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
     const double *const __restrict__ By3D = EMfields->By_m->data_;
     const double *const __restrict__ Bz3D = EMfields->Bz_m->data_;
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int sizeofEx = EMfields->Ex_->size();
     const int sizeofEy = EMfields->Ey_->size();
     const int sizeofEz = EMfields->Ez_->size();
@@ -224,7 +222,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
                        position_y /* [first_index:npart_range_size] */,        \
                        position_z /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index;
@@ -282,7 +280,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
             delta[1*nparts+ipart] = delta_p[1];
             delta[2*nparts+ipart] = delta_p[2];
         }
-        #if defined(SMILEI_OPENACC_MODE)
+        #if defined(SMILEI_ACCELERATOR_GPU_OACC)
             #pragma acc exit data delete(this)
         #endif
     } else { // with B-TIS3 interpolation
@@ -302,7 +300,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
                        position_y /* [first_index:npart_range_size] */,        \
                        position_z /* [first_index:npart_range_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     #pragma acc enter data create(this)
     #pragma acc update device(this)
     size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index;
@@ -368,7 +366,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part
             delta[ipart+0*nparts] = delta_p[0];
             delta[ipart+1*nparts] = delta_p[1];
             delta[ipart+2*nparts] = delta_p[2];
-            #if defined(SMILEI_OPENACC_MODE)
+            #if defined(SMILEI_ACCELERATOR_GPU_OACC)
                 #pragma acc exit data delete(this)
             #endif
         } // end ipart loop
diff --git a/src/Interpolator/Interpolator3D2Order.h b/src/Interpolator/Interpolator3D2Order.h
index 52f0335a0..1fa07438d 100755
--- a/src/Interpolator/Interpolator3D2Order.h
+++ b/src/Interpolator/Interpolator3D2Order.h
@@ -59,7 +59,7 @@ class Interpolator3D2Order : public Interpolator3D
         int idx, 
         int idy, 
         int idz, 
-        int nx, 
+        int /*nx*/, 
         int ny, 
         int nz )
     {
diff --git a/src/Interpolator/InterpolatorFactory.h b/src/Interpolator/InterpolatorFactory.h
index f2cbd7c19..37e1042fb 100755
--- a/src/Interpolator/InterpolatorFactory.h
+++ b/src/Interpolator/InterpolatorFactory.h
@@ -48,12 +48,22 @@ class InterpolatorFactory
         // 1Dcartesian simulation
         // ---------------
         if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == 2 ) ) {
+            if( !vectorization ) {
+                if ( params.interpolator_ == "momentum-conserving" ) {
+                    Interp = new Interpolator1D2Order( params, patch );
+                }
+                else if ( params.interpolator_ == "wt" ) {
+                    Interp = new Interpolator1DWT2Order( params, patch );
+                }
+            }
+            else {
                 if ( params.interpolator_ == "momentum-conserving" ) {
                     Interp = new Interpolator1D2OrderV( params, patch );
                 }
                 else if ( params.interpolator_ == "wt" ) {
                     Interp = new Interpolator1DWT2OrderV( params, patch );
                 }
+            }
         } else if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == 4 ) ) {
             if( params.interpolator_ == "momentum-conserving" ) {
                 Interp = new Interpolator1D4Order( params, patch );
diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp
index 08ffada69..4ee9781c7 100755
--- a/src/MovWindow/SimWindow.cpp
+++ b/src/MovWindow/SimWindow.cpp
@@ -383,15 +383,10 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params &params,
 
                         } // end loop nSpecies
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
-                        if ( params.gpu_computing ) {
-                            // ADD NEW PARTS ON GPU
-                            for( unsigned int ispec=0 ; ispec<nSpecies ; ispec++ ) {
-                              mypatch->vecSpecies[ispec]->particles_to_move->clear();
-                            //   mypatch->vecSpecies[ispec]->particles->copyParticles( 0, mypatch->vecSpecies[ispec]->getNbrOfParticles(),
-                            //                                                         *mypatch->vecSpecies[ispec]->particles_to_move, 0 );
-                              mypatch->vecSpecies[ispec]->particles->initializeDataOnDevice();
-                              mypatch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice();
+#if defined ( SMILEI_ACCELERATOR_GPU )
+                        if( params.gpu_computing ) {
+                            for( auto spec: mypatch->vecSpecies ) {
+                                spec->allocateParticlesOnDevice();
                             }
                         }
 #endif
@@ -403,7 +398,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params &params,
                         
                     } // end test patch_particle_created[ithread][j]
 
-#if defined ( SMILEI_ACCELERATOR_MODE )
+#if defined ( SMILEI_ACCELERATOR_GPU )
                     // if ( params.gpu_computing ) {
                         // Initializes only field data structures, particle data structure are initialized separately
                         mypatch->allocateAndCopyFieldsOnDevice();
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
index 6f7b9e0df..8136f36ff 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp
@@ -10,7 +10,7 @@
 #include "MultiphotonBreitWheeler.h"
 #include "Species.h"
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     #define __HIP_PLATFORM_NVCC__
     #define __HIP_PLATFORM_NVIDIA__
     #include "gpuRandom.h"
@@ -248,7 +248,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
     double *const __restrict__ pair1_chi = new_pair[1]->has_quantum_parameter ? new_pair[1]->getPtrChi() : nullptr;
     double *const __restrict__ pair1_tau = new_pair[1]->has_Monte_Carlo_process ? new_pair[1]->getPtrTau() : nullptr;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Parameters for random generator
     unsigned long long seed;
     unsigned long long seq;
@@ -325,7 +325,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                                 Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                                 Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
                                 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                                 
     }
 
@@ -349,7 +349,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                 while( tau[ipart] <= epsilon_tau_ ) {
                     //tau[ipart] = -log( 1.-Rand::uniform() );
                     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     tau[ipart] = -std::log( 1.-rand_->uniform() );
 #else
                     
@@ -406,7 +406,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     double pair_chi[2];
 
                     // Draw random number in [0,1[
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     const double random_number = rand_->uniform();
 #else
                     seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator
@@ -431,7 +431,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     SMILEI_UNUSED( ibin );
                     // Creation of new electrons in the temporary array new_pair[0]
                     new_pair[0]->createParticles( mBW_pair_creation_sampling_[0] );
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     // Final size
                     int nparticles = new_pair[0]->size();
 
@@ -442,7 +442,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
 #endif
 
                     // For all new paticles
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     #pragma omp simd
 #endif
                     for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) {
@@ -466,7 +466,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                         }
             //               + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         // Old positions
                         if( particles.keepOldPositions() ) {
                             pair0_position_old_x[ipair]=position_x[ipart] ;
@@ -494,7 +494,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                     // Create particle for the second pair species
                     new_pair[1]->createParticles( mBW_pair_creation_sampling_[1] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     // Final size
                     nparticles = new_pair[1]->size();
 
@@ -505,7 +505,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
 #endif
 
                     // For all new paticles
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     #pragma omp simd
 #endif
                     for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) {
@@ -530,7 +530,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
                         }
             //               + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         // Old positions
                         if( particles.keepOldPositions() ) {
                             pair1_position_old_x[ipair]=position_x[ipart] ;
@@ -629,7 +629,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles,
         }
     } // end ipart loop
     
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     }
 #endif
 }
@@ -795,7 +795,7 @@ void MultiphotonBreitWheeler::removeDecayedPhotonsWithoutBinCompression(
                 if( ipart < last_photon_index ) {
                     // The last existing photon comes to the position of
                     // the deleted photon
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                     particles.overwriteParticle( last_photon_index, ipart );
 #else
 #endif
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
index 6e14a37f3..71315d79a 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h
@@ -115,7 +115,7 @@ class MultiphotonBreitWheeler
     //! \param bmin        Pointer toward the first particle index of the bin in the Particles object
     //! \param bmax        Pointer toward the last particle index of the bin in the Particles object
     //! \param ithread     Thread index
-//#ifdef SMILEI_OPENACC_MODE
+//#ifdef SMILEI_ACCELERATOR_GPU_OACC
 //    #pragma acc routine seq
 //#endif
     void removeDecayedPhotonsWithoutBinCompression(
diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
index 4f7f1ce72..9bef108b6 100755
--- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
+++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h
@@ -54,7 +54,7 @@ class MultiphotonBreitWheelerTables
     //! the multiphoton Breit-Wheeler pair creation
     //! \param photon_chi photon quantum parameter
     //! \param[out] pair_chi quantum parameters of the pair
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     void computePairQuantumParameter( const double photon_chi, 
@@ -71,7 +71,7 @@ class MultiphotonBreitWheelerTables
     //! \param photon_chi photon quantum parameter
     //! \param gamma photon normalized energy
     // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computeBreitWheelerPairProductionRate( 
diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp
index bc9fb8ed4..69973d104 100755
--- a/src/Params/Params.cpp
+++ b/src/Params/Params.cpp
@@ -129,16 +129,20 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
     PyObject_SetAttrString( Py_main, "_test_mode", Py_False );
     PyTools::checkPyError();
 
-    // here we add the rank, in case some script need it
+    // we add the rank, in case some script needs it
     PyModule_AddIntConstant( Py_main, "smilei_mpi_rank", smpi->getRank() );
 
-    // here we add the MPI size, in case some script need it
+    // we add the MPI size, in case some script needs it
     PyModule_AddIntConstant( Py_main, "smilei_mpi_size", smpi->getSize() );
     namelist += string( "smilei_mpi_size = " ) + to_string( smpi->getSize() ) + "\n";
 
-    // here we add the larget int, important to get a valid seed for randomization
-    PyModule_AddIntConstant( Py_main, "smilei_rand_max", RAND_MAX );
-    namelist += string( "smilei_rand_max = " ) + to_string( RAND_MAX ) + "\n\n";
+    // we add the openMP size, in case some script needs it
+    PyModule_AddIntConstant( Py_main, "smilei_omp_threads", smpi->getOMPMaxThreads() );
+    namelist += string( "smilei_omp_threads = " ) + to_string( smpi->getOMPMaxThreads() ) + "\n";
+
+    // we add the total number of cores, in case some script needs it
+    PyModule_AddIntConstant( Py_main, "smilei_total_cores", smpi->getGlobalNumCores() );
+    namelist += string( "smilei_total_cores = " ) + to_string( smpi->getGlobalNumCores() ) + "\n";
 
     // Running pyprofiles.py
     runScript( string( reinterpret_cast<const char *>( pyprofiles_py ), pyprofiles_py_len ), "pyprofiles.py", globals );
@@ -833,7 +837,7 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
 
     PyTools::extract( "gpu_computing", gpu_computing, "Main" );
     if( gpu_computing ) {
-#if( defined( SMILEI_OPENACC_MODE ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if( defined( SMILEI_ACCELERATOR_GPU_OACC ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         // If compiled for GPU and asking for GPU
         MESSAGE( 1, "Smilei will run on GPU devices" );
 #else
@@ -1051,27 +1055,26 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
             // Extract the list of profiles and verify their content
             PyObject *p = PyTools::extract_py( "_profiles", "Laser", i_laser );
             vector<PyObject *> profiles;
-            vector<int> profiles_n = {1, 2};
             if( ! PyTools::py2pyvector( p, profiles ) ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile must be a list of 2 profiles",  LINK_NAMELIST + std::string("#lasers") );
             }
             Py_DECREF( p );
-            if( profiles.size()!=2 ) {
+            if( profiles.size() != 2 ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.",  LINK_NAMELIST + std::string("#lasers") );
             }
-            if( profiles[1] == Py_None ) {
-                profiles  .pop_back();
-                profiles_n.pop_back();
-            }
-            if( profiles[0] == Py_None ) {
-                profiles  .erase( profiles  .begin() );
-                profiles_n.erase( profiles_n.begin() );
+            vector<int> profiles_n;
+            vector<PyObject *> profiles_kept;
+            for( unsigned int i = 0; i < 2; i++ ) {
+                if( profiles[i] != Py_None ) {
+                    profiles_kept.push_back( profiles[i] );
+                    profiles_n.push_back( i + 1 );
+                }
             }
-            if( profiles.size() == 0 ) {
+            if( profiles_kept.size() == 0 ) {
                 ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") );
             }
-            for( unsigned int i=0; i<profiles.size(); i++ ) {
-                int nargs = PyTools::function_nargs( profiles[i] );
+            for( unsigned int i=0; i<profiles_kept.size(); i++ ) {
+                int nargs = PyTools::function_nargs( profiles_kept[i] );
                 if( nargs == -2 ) {
                     ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile["<<i<<"] not callable", LINK_NAMELIST + std::string("#lasers") );
                 }
@@ -1117,10 +1120,14 @@ Params::Params( SmileiMPI *smpi, std::vector<std::string> namelistsFiles ) :
 
                 // Make the propagation happen and write out the file
                 if( ! smpi->test_mode ) {
-                    propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z );
+                    propagateX( profiles_kept, profiles_n, offset, file, keep_n_strongest_modes, angle_z );
                 }
             }
-
+            
+            for( auto p: profiles ) {
+                Py_DECREF( p );
+            }
+            
             n_laser_offset ++;
         }
     }
@@ -1223,7 +1230,7 @@ void Params::compute()
 
     // Set cluster_width_ if not set by the user
     if( cluster_width_ == -1 ) {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         cluster_width_ = patch_size_[0];
         // On GPU, dont do the CPU automatic cluster_width computation, only one
         // bin is expected.
@@ -1272,7 +1279,7 @@ void Params::compute()
 
 
     // Verify that cluster_width_ divides patch_size_[0] or patch_size_[n] in GPU mode
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     const int kClusterWidth = getGPUClusterWidth();
 
     if( kClusterWidth < 0 ) {
@@ -1882,7 +1889,7 @@ string Params::speciesField( string field_name )
     return "";
 }
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 bool Params::isGPUParticleBinningAvailable() const
 {
@@ -1899,7 +1906,7 @@ bool Params::isGPUParticleBinningAvailable() const
 
 #endif
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 int Params::getGPUClusterWidth() const
 {
diff --git a/src/Params/Params.h b/src/Params/Params.h
index e2b0603e6..f22dec0cb 100755
--- a/src/Params/Params.h
+++ b/src/Params/Params.h
@@ -386,7 +386,7 @@ class Params
     //!
     bool isGPUParticleBinningAvailable() const;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
     //! Given dimension_id in [0, 3), return for dimension_id == :
     //! 1: the 1D value (not implemented)
@@ -407,7 +407,7 @@ class Params
     //#if defined( SMILEI_ACCELERATOR_GPU_OMP )
         switch( dimension_id ) {
             case 1:
-                return -1;
+                return 4; // check for optimal value
             case 2:
                 return 4;
             case 3:
diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp
index 318b6b289..304656eca 100755
--- a/src/ParticleBC/BoundaryConditionType.cpp
+++ b/src/ParticleBC/BoundaryConditionType.cpp
@@ -18,7 +18,7 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l
     energy_change = 0.;     // no energy loss during exchange
     const double* const position  = species->particles->getPtrPosition( direction );
     int* const          cell_keys = species->particles->getPtrCellKeys();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,cell_keys)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -28,9 +28,9 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l
                        cell_keys /* [imin:imax - imin] */ )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] < limit_inf) {
-            cell_keys[ ipart ] = -1;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( cell_keys[ ipart ] >= 0 && position[ ipart ] < limit_inf ) {
+            cell_keys[ ipart ] = -2 - 2 * direction;
         }
     }
 }
@@ -40,7 +40,7 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l
     energy_change = 0.;     // no energy loss during exchange
     const double* const position  = species->particles->getPtrPosition( direction );
     int* const          cell_keys = species->particles->getPtrCellKeys();
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,cell_keys)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -50,9 +50,9 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l
                        cell_keys /* [imin:imax - imin] */ )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] >= limit_sup) {
-            cell_keys[ ipart ] = -1;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( cell_keys[ ipart ] >= 0 && position[ ipart ] >= limit_sup ) {
+            cell_keys[ ipart ] = -3 - 2 * direction;
         }
     }
 }
@@ -63,10 +63,11 @@ void internal_inf_AM( Species *species, int imin, int imax, int /*direction*/, d
     double* position_y = species->particles->getPtrPosition(1);
     double* position_z = species->particles->getPtrPosition(2);
     int* cell_keys = species->particles->getPtrCellKeys();
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
+    double limit_inf2 = limit_inf*limit_inf;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
         double distance2ToAxis = position_y[ipart]*position_y[ipart]+position_z[ipart]*position_z[ipart];
-        if ( distance2ToAxis < limit_inf*limit_inf ) {
-            cell_keys[ ipart ] = -1;
+        if( cell_keys[ ipart ] >= 0 && distance2ToAxis < limit_inf2 ) {
+            cell_keys[ ipart ] = -4;
         }
     }
 }
@@ -77,10 +78,11 @@ void internal_sup_AM( Species *species, int imin, int imax, int /*direction*/, d
     double* position_y = species->particles->getPtrPosition(1);
     double* position_z = species->particles->getPtrPosition(2);
     int* cell_keys = species->particles->getPtrCellKeys();
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
+    double limit_sup2 = limit_sup*limit_sup;
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
         double distance2ToAxis = position_y[ipart]*position_y[ipart]+position_z[ipart]*position_z[ipart];
-        if ( distance2ToAxis >= limit_sup*limit_sup ) {
-            cell_keys[ ipart ] = -1;
+        if( cell_keys[ ipart ] >= 0 && distance2ToAxis >= limit_sup2 ) {
+            cell_keys[ ipart ] = -5;
         }
     }
 }
@@ -90,15 +92,15 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction,
     energy_change = 0.;     // no energy loss during reflection
     double* position = species->particles->getPtrPosition(direction);
     double* momentum = species->particles->getPtrMomentum(direction);
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel deviceptr(position,momentum)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target is_device_ptr( position, momentum )
     #pragma omp teams distribute parallel for
 #endif
-    for (int ipart=imin ; ipart<imax ; ipart++ ) {
-        if ( position[ ipart ] < limit_inf ) {
+    for( int ipart=imin ; ipart<imax ; ipart++ ) {
+        if( position[ ipart ] < limit_inf ) {
             position[ ipart ] = 2.*limit_inf - position[ ipart ];
             momentum[ ipart ] = -momentum[ ipart ];
         }
@@ -110,7 +112,7 @@ void reflect_particle_sup( Species *species, int imin, int imax, int direction,
     energy_change = 0.;     // no energy loss during reflection
     double* position = species->particles->getPtrPosition(direction);
     double* momentum = species->particles->getPtrMomentum(direction);
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel deviceptr(position,momentum)
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -187,9 +189,9 @@ void remove_particle_inf( Species* species,
                           int imin, int imax, 
                           int direction, 
                           double limit_inf, 
-                          double dt, 
-                          std::vector<double>& invgf, 
-                          Random* rand, 
+                          double /*dt*/, 
+                          std::vector<double>& /*invgf*/, 
+                          Random* /*rand*/, 
                           double& energy_change )
 {
 
@@ -208,7 +210,7 @@ void remove_particle_inf( Species* species,
                                                                                                                                : change_in_energy )
     #pragma omp teams distribute parallel for reduction( + \
                                                          : change_in_energy )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys)
     #pragma acc loop gang worker vector reduction(+ : change_in_energy)
 #else
@@ -233,9 +235,9 @@ void remove_particle_sup( Species* species,
                           int imin, int imax, 
                           int direction, 
                           double limit_sup, 
-                          double dt, 
-                          std::vector<double>& invgf, 
-                          Random* rand, 
+                          double /*dt*/, 
+                          std::vector<double>& /*invgf*/, 
+                          Random* /*rand*/, 
                           double& energy_change )
 {
 
@@ -254,7 +256,7 @@ void remove_particle_sup( Species* species,
                                                                                                                                : change_in_energy )
     #pragma omp teams distribute parallel for reduction( + \
                                                          : change_in_energy )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys)
     #pragma acc loop gang worker vector reduction(+ : change_in_energy)
 #else
diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h
index 47ab7e235..7afd6ca9c 100755
--- a/src/ParticleBC/PartBoundCond.h
+++ b/src/ParticleBC/PartBoundCond.h
@@ -44,7 +44,7 @@ class PartBoundCond
         } else {
             int *const cell_keys = species->particles->getPtrCellKeys();
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel deviceptr( cell_keys )
     #pragma acc loop gang worker vector
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp
index 308ee4319..dae056745 100755
--- a/src/Particles/Particles.cpp
+++ b/src/Particles/Particles.cpp
@@ -413,6 +413,51 @@ void Particles::copyParticles( unsigned int iPart, unsigned int nPart, Particles
     }
 }
 
+// ---------------------------------------------------------------------------------------------------------------------
+//! Copy particles indexed by array 'indices' to dest_id in dest_parts
+//! The array 'indices' must be sorted in increasing order
+//! cell keys not affected
+// ---------------------------------------------------------------------------------------------------------------------
+void Particles::copyParticles( vector<size_t> indices, Particles &dest_parts, int dest_id )
+{
+    const size_t transfer_size = indices.size();
+    const size_t dest_new_size = dest_parts.size() + transfer_size;
+    const size_t displaced_size = dest_parts.size() - dest_id;
+    
+    for( unsigned int iprop=0 ; iprop<double_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.double_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + displaced_size, dest_parts.double_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]];
+        }
+    }
+    
+    for( unsigned int iprop=0 ; iprop<short_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.short_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + displaced_size, dest_parts.short_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]];
+        }
+    }
+    
+    for( unsigned int iprop=0 ; iprop<uint64_prop_.size() ; iprop++ ) {
+        // Make space in dest array
+        dest_parts.uint64_prop_[iprop]->resize( dest_new_size );
+        auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id;
+        move_backward( loc, loc + displaced_size, dest_parts.uint64_prop_[iprop]->end() );
+        // Copy data
+        for( size_t i = 0; i < transfer_size; i++ ) {
+            ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]];
+        }
+    }
+}
+
 // ---------------------------------------------------------------------------------------------------------------------
 //! Make a new particle at the position of another
 //! cell keys not affected
@@ -529,6 +574,70 @@ void Particles::eraseParticle( unsigned int ipart, unsigned int npart, bool comp
 
 }
 
+
+// ---------------------------------------------------------------------------------------------------------------------
+//! Erase particles indexed by array 'indices' to dest_id in dest_parts
+//! The array 'indices' must be sorted in increasing order
+//! cell keys not affected
+// ---------------------------------------------------------------------------------------------------------------------
+void Particles::eraseParticles( vector<size_t> indices )
+{
+    const size_t indices_size = indices.size();
+    const size_t initial_size = size();
+    
+    if( indices_size > 0 ) {
+        
+        for( auto prop : double_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+        for( auto prop : short_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+        for( auto prop : uint64_prop_ ) {
+            // Relocate data to fill erased space
+            size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0];
+            for( size_t from = indices[0]+1; from < initial_size; from++ ) {
+                if( from < stop ) {
+                    ( *prop )[to] = ( *prop )[from];
+                    to++;
+                } else {
+                    j++;
+                    stop = ( j == indices_size ) ? initial_size : indices[j];
+                }
+            }
+            // Resize
+            prop->resize( initial_size - indices_size );
+        }
+        
+    }
+}
+
 // ---------------------------------------------------------------------------------------------------------------------
 // Print parameters of particle iPart
 // ---------------------------------------------------------------------------------------------------------------------
@@ -1190,21 +1299,61 @@ void Particles::copyFromHostToDevice()
 {
     ERROR( "Device only feature, should not have come here!" );
 }
-void Particles::copyFromDeviceToHost()
+void Particles::copyFromDeviceToHost( bool )
 {
     ERROR( "Device only feature, should not have come here!" );
 }
 
-void Particles::extractParticles( Particles* particles_to_move )
+// Loop all particles and copy the outgoing ones to buffers
+void Particles::copyLeavingParticlesToBuffers( const vector<bool> copy, const vector<Particles*> buffer )
 {
-    particles_to_move->clear();
-    for ( int ipart=0 ; ipart<size() ; ipart++ ) {
-        if ( cell_keys[ipart] == -1 ) {
-            copyParticle( ipart, *particles_to_move );
+    // Leaving particles have a cell_key equal to -2-direction
+    // where direction goes from 0 to 6 and tells which way the particle escapes.
+    // If the cell_key is -1, the particle must be destroyed so it is not extracted.
+
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
+
+    // GPU
+    
+    // Copy leaving particles to buffer[0] on the GPU
+    copyLeavingParticlesToBuffer( buffer[0] );
+    
+    // Dispatch between the different buffers on the CPU
+    // (doing this on the GPU is slower; maybe replacing thrust operations with pure cuda would work)
+    vector<size_t> indices;
+    for( size_t ipart = 0; ipart < buffer[0]->size(); ipart++ ) {
+        int direction = -buffer[0]->cell_keys[ipart] - 2;
+        if( direction > 0 ) {
+            if( copy[direction] ) {
+                buffer[0]->copyParticle( ipart, *buffer[direction] );
+            }
+            indices.push_back( ipart );
         }
     }
+    buffer[0]->eraseParticles( indices );
+
+#else
+
+    // CPU
+    
+    for( size_t ipart = 0; ipart < size(); ipart++ ) {
+        if( cell_keys[ipart] < -1 ) {
+            int direction = -cell_keys[ipart] - 2;
+            if( copy[direction] ) {
+                copyParticle( ipart, *buffer[direction] );
+            }
+        }
+    }
+    
+#endif
 }
 
+void Particles::copyLeavingParticlesToBuffer( Particles* )
+{
+    ERROR( "Device only feature, should not have come here!" );
+}
+
+
 void Particles::savePositions() {
     unsigned int ndim = Position.size(), npart = size();
     double *p[3], *pold[3];
@@ -1249,13 +1398,12 @@ int Particles::eraseLeavingParticles()
     return 0;
 }
 
-int Particles::injectParticles( Particles *particles_to_inject )
+int Particles::addParticles( Particles* particles_to_inject )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
-    return 0;
 }
 
-void Particles::importAndSortParticles( Particles *particles_to_inject )
+void Particles::importAndSortParticles( Particles */*particles_to_inject*/ )
 {
     ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." );
 }
diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h
index 1f67ab1cc..20b9c2ea6 100755
--- a/src/Particles/Particles.h
+++ b/src/Particles/Particles.h
@@ -143,6 +143,8 @@ class Particles
 
     //! Insert nPart particles starting at ipart to dest_id in dest_parts
     void copyParticles( unsigned int iPart, unsigned int nPart, Particles &dest_parts, int dest_id );
+    //! Transfer particles indexed by array indices to dest_id in dest_parts
+    void copyParticles( std::vector<size_t> indices, Particles &dest_parts, int dest_id );
 
     //! Make a new particle at the position of another
     void makeParticleAt( Particles &source_particles, unsigned int ipart, double w, short q=0., double px=0., double py=0., double pz=0. );
@@ -151,6 +153,8 @@ class Particles
     void eraseParticle( unsigned int iPart, bool compute_cell_keys = false );
     //! Suppress nPart particles from iPart
     void eraseParticle( unsigned int iPart, unsigned int nPart, bool compute_cell_keys = false );
+    //! Suppress indexed particles
+    void eraseParticles( std::vector<size_t> indices );
 
     //! Suppress all particles from iPart to the end of particle array
     void eraseParticleTrail( unsigned int iPart, bool compute_cell_keys = false );
@@ -431,7 +435,7 @@ class Particles
     virtual void initializeDataOnDevice();
     virtual void initializeIDsOnDevice();
     virtual void copyFromHostToDevice();
-    virtual void copyFromDeviceToHost();
+    virtual void copyFromDeviceToHost( bool copy_keys = false );
 
     //! Return the pointer toward the Position[idim] vector
     virtual double* getPtrPosition( int idim ) {
@@ -469,10 +473,10 @@ class Particles
     // Accelerator specific virtual functions
 
     // -----------------------------------------------------------------------------
-    //! Extract particles from the Particles object and put
-    //! them in the Particles object `particles_to_move`
+    //! Extract particles leaving the box to buffers
     // -----------------------------------------------------------------------------
-    virtual void extractParticles( Particles *particles_to_move );
+    void copyLeavingParticlesToBuffers( const std::vector<bool> copy, const std::vector<Particles*> buffer );
+    virtual void copyLeavingParticlesToBuffer( Particles* buffer );
 
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device
@@ -480,11 +484,9 @@ class Particles
     virtual int eraseLeavingParticles();
 
     // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_move object and put
-    //! them in the Particles object
-    //! \param[in,out] particles_to_inject Particles object containing particles to inject
-    virtual int injectParticles( Particles *particles_to_inject );
-
+    //! Resize & Copy particles from particles_to_inject to the end of the vectors
+    virtual int addParticles( Particles* particles_to_inject  );
+    
     //! Implementation of a somewhat efficient particle injection, sorting
     //! (including removing leaving particles) and binning for GPU if
     //! available for the configuration of offloading technology
diff --git a/src/Particles/ParticlesFactory.cpp b/src/Particles/ParticlesFactory.cpp
index 00f51bbb0..34e9a3a83 100755
--- a/src/Particles/ParticlesFactory.cpp
+++ b/src/Particles/ParticlesFactory.cpp
@@ -7,7 +7,7 @@
 // -----------------------------------------------------------------------------
 #include "ParticlesFactory.h"
 
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
 extern "C" void* CreateGPUParticles( const void* parameters,
                                      const void* a_parent_patch );
 #endif
@@ -22,7 +22,7 @@ Particles* ParticlesFactory::create( const Params& parameters,
         // We export a C interface to avoid potential ABI problems
         // that could occur when using two different compilers (e.g., one to
         // compile cuda/hip and another one for the host code).
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
         particles = static_cast<Particles*>( CreateGPUParticles( &parameters, &a_parent_patch ) );
 #else
         SMILEI_UNUSED( a_parent_patch );
diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu
old mode 100644
new mode 100755
index d7a63f0b3..a307455ea
--- a/src/Particles/nvidiaParticles.cu
+++ b/src/Particles/nvidiaParticles.cu
@@ -15,6 +15,8 @@
 #include <thrust/count.h>
 #include <thrust/remove.h>
 #include <thrust/sort.h>
+#include <thrust/gather.h>
+#include <thrust/sequence.h>
 
 
 #include "Patch.h"
@@ -33,14 +35,25 @@
 // Cell key manipulation functor definition
 ////////////////////////////////////////////////////////////////////////////////
 
-//! Structure with specific function count_if_out for thrust::tuple operator
-//! Return True if the entry is -1 as in the cell keys vector for instance
-struct count_if_out
+//! Predicate for cell_keys
+//! Return True if the entry is equal to `code`
+template<int code>
+struct cellKeyEquals
 {
     constexpr __host__ __device__ bool
     operator()( const int& x ) const
     {
-        return x == -1;
+        return x == code;
+    }
+};
+
+template<int key>
+struct cellKeyBelow
+{
+    constexpr __host__ __device__ bool
+    operator()( const int& x ) const
+    {
+        return x < key;
     }
 };
 
@@ -71,12 +84,6 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        //! Sort the particle on GPU by their cluster/cell key.
-        //!
-        static inline void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
         //! precondition:
         //!     - nvidia_cell_keys_ shall be sorted in non decreasing order
         //!     - last_index.data() is a pointer mapped to GPU via
@@ -107,36 +114,45 @@ namespace detail {
                                      InputIterator last,
                                      ClusterType   cluster_type );
 
-        template <typename RandomAccessIterator0,
-                  typename RandomAccessIterator1>
-        static void
-        doSortParticleByKey( RandomAccessIterator0 key_first,
-                             RandomAccessIterator0 key_last,
-                             RandomAccessIterator1 value_first );
+    };
 
-        template <typename ClusterType,
-                  typename ParticleIteratorProvider,
-                  typename ParticleNoKeyIteratorProvider>
+    template <Cluster::DifferenceType kClusterWidth>
+    struct Cluster1D : public Cluster
+    {
+    public:
+        Cluster1D( double   inverse_x_cell_dimension,
+                   SizeType local_x_dimension_in_cell,
+        int CellStartingGlobalIndex_for_x);
+
+        //! Compute the cell key of a_particle. a_particle shall be a tuple (from a
+        //! zipiterator).
+        //! The first value of a_particle is the cell key value, the other values are
+        //! the positions x 
+        template <typename Tuple>
+        __host__ __device__ IDType
+        Index( const Tuple& a_particle ) const;
+
+        //! Compute the cell key of a particle range.
+        //!
         static void
-        doImportAndSortParticles( nvidiaParticles&              particle_container,
-                                  nvidiaParticles&              particle_to_inject,
-                                  ClusterType                   cluster_type,
-                                  ParticleIteratorProvider      particle_iterator_provider,
-                                  ParticleNoKeyIteratorProvider particle_no_key_iterator_provider );
-    };
+        computeParticleClusterKey( nvidiaParticles& particle_container,
+                                   const Params&    parameters,
+                                   const Patch&     a_parent_patch );
 
+        double   inverse_of_x_cell_dimension_;
+        int CellStartingGlobalIndex_for_x_;
+    };
 
     template <Cluster::DifferenceType kClusterWidth>
     struct Cluster2D : public Cluster
     {
-    public:
     public:
         Cluster2D( double   inverse_x_cell_dimension,
                    double   inverse_y_cell_dimension,
                    SizeType local_x_dimension_in_cell,
                    SizeType local_y_dimension_in_cell,
-		   int CellStartingGlobalIndex_for_x,
-		   int CellStartingGlobalIndex_for_y);
+                   int CellStartingGlobalIndex_for_x,
+                   int CellStartingGlobalIndex_for_y);
 
         //! Compute the cell key of a_particle. a_particle shall be a tuple (from a
         //! zipiterator).
@@ -154,28 +170,17 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        static void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
-        static void
-        importAndSortParticles( nvidiaParticles& particle_container,
-                                nvidiaParticles& particle_to_inject,
-                                const Params&    parameters,
-                                const Patch&     a_parent_patch );
-
     public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
         SizeType local_y_dimension_in_cluster_;
-	int CellStartingGlobalIndex_for_x_;
+        int CellStartingGlobalIndex_for_x_;
         int CellStartingGlobalIndex_for_y_;
     };
 
     template <Cluster::DifferenceType kClusterWidth>
     struct Cluster3D : public Cluster
     {
-    public:
     public:
         Cluster3D( double   inverse_x_cell_dimension,
                    double   inverse_y_cell_dimension,
@@ -183,7 +188,7 @@ namespace detail {
                    SizeType local_x_dimension_in_cell,
                    SizeType local_y_dimension_in_cell,
                    SizeType local_z_dimension_in_cell,
-		   int CellStartingGlobalIndex_for_x,
+                   int CellStartingGlobalIndex_for_x,
                    int CellStartingGlobalIndex_for_y,
                    int CellStartingGlobalIndex_for_z);
 
@@ -203,16 +208,6 @@ namespace detail {
                                    const Params&    parameters,
                                    const Patch&     a_parent_patch );
 
-        static void
-        sortParticleByKey( nvidiaParticles& particle_container,
-                           const Params&    parameters );
-
-        static void
-        importAndSortParticles( nvidiaParticles& particle_container,
-                                nvidiaParticles& particle_to_inject,
-                                const Params&    parameters,
-                                const Patch&     a_parent_patch );
-
     public:
         double   inverse_of_x_cell_dimension_;
         double   inverse_of_y_cell_dimension_;
@@ -220,7 +215,7 @@ namespace detail {
         SizeType local_y_dimension_in_cluster_;
         SizeType local_z_dimension_in_cluster_;
         int CellStartingGlobalIndex_for_x_;
-	int CellStartingGlobalIndex_for_y_;
+        int CellStartingGlobalIndex_for_y_;
         int CellStartingGlobalIndex_for_z_;
     };
 
@@ -230,46 +225,17 @@ namespace detail {
     template <typename ClusterType>
     class AssignClusterIndex
     {
-    public:
     public:
         AssignClusterIndex( ClusterType cluster_type )
             : cluster_type_{ cluster_type }
         {
-            // EMPTY
         }
 
         template <typename Tuple>
         __host__ __device__ void
         operator()( Tuple& a_particle ) const
         {
-            thrust::get<0>( a_particle ) /* cluster key */ = cluster_type_.Index( a_particle );
-        }
-
-    protected:
-        ClusterType cluster_type_;
-    };
-
-
-    //! This functor assign a cluster key to a_particle.
-    //!
-    template <typename ClusterType>
-    struct OutOfClusterPredicate
-    {
-    public:
-    public:
-        OutOfClusterPredicate( ClusterType cluster_type )
-            : cluster_type_{ cluster_type }
-        {
-            // EMPTY
-        }
-
-        template <typename Tuple>
-        __host__ __device__ bool
-        operator()( const Tuple& a_particle ) const
-        {
-            // NOTE: its ub to set the cluster key to wrongly keyed particles
-            // now..
-            return thrust::get<0>( a_particle ) /* cluster key */ != cluster_type_.Index( a_particle );
+            thrust::get<0>( a_particle ) = cluster_type_.Index( a_particle ); //cluster key 
         }
 
     protected:
@@ -277,20 +243,6 @@ namespace detail {
     };
 
 
-    //! If the particle's cell/cluster key is -1 it means that it needs to be
-    //! evicted.
-    //!
-    struct OutOfBoundaryPredicate
-    {
-        template <typename Tuple>
-        __host__ __device__ bool
-        operator()( const Tuple& a_particle ) const
-        {
-            return thrust::get<0>( a_particle ) /* cluster key */ == -1;
-        }
-    };
-
-
     ////////////////////////////////////////////////////////////////////////////////
     // Cluster manipulation functor method definitions
     ////////////////////////////////////////////////////////////////////////////////
@@ -304,6 +256,12 @@ namespace detail {
         // dimensions.
 
         switch( particle_container.dimension() ) {
+            case 1: {
+                Cluster1D<Params::getGPUClusterWidth( 1 )>::computeParticleClusterKey( particle_container,
+                                                                                                parameters,
+                                                                                                a_parent_patch );
+                break;
+            }
             case 2: {
                 Cluster2D<Params::getGPUClusterWidth( 2 )>::computeParticleClusterKey( particle_container,
                                                                                                 parameters,
@@ -317,32 +275,7 @@ namespace detail {
                 break;
             }
             default:
-                // Not implemented, only Cartesian 2D or 3D for the moment
-                SMILEI_ASSERT( false );
-                break;
-        }
-    }
-
-    inline void
-    Cluster::sortParticleByKey( nvidiaParticles& particle_container,
-                                const Params&    parameters )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // dimensions.
-
-        switch( particle_container.dimension() ) {
-            case 2: {
-                Cluster2D<Params::getGPUClusterWidth( 2 )>::sortParticleByKey( particle_container,
-                                                                                        parameters );
-                break;
-            }
-            case 3: {
-                Cluster3D<Params::getGPUClusterWidth( 3 )>::sortParticleByKey( particle_container,
-                                                                                        parameters );
-                break;
-            }
-            default:
-                // Not implemented, only Cartesian 2D or 3D for the moment
+                // Not implemented, only Cartesian 1D, 2D or 3D for the moment
                 SMILEI_ASSERT( false );
                 break;
         }
@@ -388,30 +321,46 @@ namespace detail {
                                      const Params&    parameters,
                                      const Patch&     a_parent_patch )
     {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // dimensions.
+        const auto initial_count = particle_container.deviceSize();
+        const auto inject_count  = particle_to_inject.deviceSize();
 
-        switch( particle_container.dimension() ) {
-            case 2: {
-                Cluster2D<Params::getGPUClusterWidth( 2 /* 2D */ )>::importAndSortParticles( particle_container,
-                                                                                             particle_to_inject,
-                                                                                             parameters,
-                                                                                             a_parent_patch );
-                break;
-            }
-            case 3: {
-                Cluster3D<Params::getGPUClusterWidth( 3 /* 2D */ )>::importAndSortParticles( particle_container,
-                                                                                             particle_to_inject,
-                                                                                             parameters,
-                                                                                             a_parent_patch );
-                break;
-            }
-
-            default:
-                // Not implemented, only 2D for the moment
-                SMILEI_ASSERT( false );
-                break;
+        // Locate out-of-bounds particles in array "available_places"
+        const auto keys = particle_container.getPtrCellKeys();
+        const auto erased_count = thrust::count_if( thrust::device, keys, keys + initial_count, cellKeyBelow<0>() );
+        thrust::device_vector<int> available_places( erased_count );
+        thrust::copy_if( thrust::device,
+                         thrust::counting_iterator<int>{0},
+                         thrust::counting_iterator<int>{ (int) initial_count },
+                         keys,
+                         available_places.begin(),
+                         cellKeyBelow<0>() );
+        
+        const auto new_count = initial_count + inject_count - erased_count;
+        
+        // Copy the imported particles to available places
+        particle_to_inject.scatterParticles( particle_container, available_places );
+        // If there are more imported particles than places, copy the remaining imported particles at the end
+        if( inject_count >= erased_count ) {
+            particle_container.deviceResize( new_count );
+            particle_container.pasteParticles( &particle_to_inject, initial_count, erased_count );
+        // If there are more places than imported particles, the remaining places should be filled
+        } else {
+            const auto last_filled = available_places[inject_count];
+            particle_container.eraseParticlesByPredicate( cellKeyBelow<0>(), last_filled );
+            particle_container.deviceResize( new_count );
         }
+        
+        // Compute keys of particles
+        computeParticleClusterKey( particle_container, parameters, a_parent_patch );
+        
+        // Sort particles by keys 
+        // using particle_to_inject as a buffer (it is swapped with particle_container after sorting)
+        particle_to_inject.deviceReserve( new_count ); // reserve a bit more memory for the final arrays
+        particle_to_inject.deviceResize( new_count );
+        particle_container.sortParticleByKey( particle_to_inject );
+        
+        // Recompute bin locations
+        computeBinIndex( particle_container );
     }
 
     template <typename InputIterator,
@@ -426,142 +375,31 @@ namespace detail {
                           AssignClusterIndex<ClusterType>{ cluster_type } );
     }
 
-    template <typename RandomAccessIterator0,
-              typename RandomAccessIterator1>
-    void
-    Cluster::doSortParticleByKey( RandomAccessIterator0 key_first,
-                                  RandomAccessIterator0 key_last,
-                                  RandomAccessIterator1 value_first )
-    {
-        thrust::sort_by_key( thrust::device,
-                             key_first, key_last,
-                             value_first );
-    }
+    ////////////////////////////////////////////////////////////////////////////////
+    // Cluster method definitions
+    ////////////////////////////////////////////////////////////////////////////////
 
-    template <typename ClusterType,
-              typename ParticleIteratorProvider,
-              typename ParticleNoKeyIteratorProvider>
-    void
-    Cluster::doImportAndSortParticles( nvidiaParticles&              particle_container,
-                                       nvidiaParticles&              particle_to_inject,
-                                       ClusterType                   cluster_type,
-                                       ParticleIteratorProvider      particle_iterator_provider,
-                                       ParticleNoKeyIteratorProvider particle_no_key_iterator_provider )
+    template <Cluster::DifferenceType kClusterWidth>
+    Cluster1D<kClusterWidth>::Cluster1D( double   inverse_x_cell_dimension,
+                                         SizeType local_x_dimension_in_cell,
+                                         int CellStartingGlobalIndex_for_x)
+        : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
+        , CellStartingGlobalIndex_for_x_{CellStartingGlobalIndex_for_x}
     {
-        const auto first_particle = particle_iterator_provider( particle_container );
-
-        auto last_particle = first_particle +
-                             particle_container.deviceSize(); // Obviously, we use half open ranges
-
-        // Remove out of bound particles
-        // Using more memory, we could use the faster remove_copy_if
-        // NOTE: remove_if is stable.
-        last_particle = thrust::remove_if( thrust::device,
-                                           first_particle,
-                                           last_particle,
-                                           OutOfBoundaryPredicate{} );
-
-        // Idea 1: - remove_copy_if instead of copy_if
-        //         - sort(the_particles_to_inject)
-        //         - merge
-        //         - compute bins
-        // NOTE: This method consumes a lot of memory ! O(N)
-
-        const auto new_particle_to_inject_count  = particle_to_inject.deviceSize();
-        const auto current_local_particles_count = std::distance( first_particle, last_particle );
-        const auto new_particle_count            = new_particle_to_inject_count + current_local_particles_count;
-
-        // NOTE: We really want a non-initializing vector here!
-        // It's possible to give a custom allocator to thrust::device_vector.
-        // Create one with construct(<>) as a noop and derive from
-        // thrust::device_malloc_allocator. For now we do an explicit resize.
-        particle_to_inject.softReserve( new_particle_count );
-        particle_to_inject.resize( new_particle_count ); // We probably invalidated the iterators
-
-        // Copy out of cluster/tile/chunk particles
-        // partition_copy is way slower than copy_if/remove_copy_if on rocthrust
-        // https://github.com/ROCmSoftwarePlatform/rocThrust/issues/247
-
-        const auto first_particle_to_inject = particle_iterator_provider( particle_to_inject );
-
-        // NOTE: copy_if/remove_copy_if are stable.
-        const auto partitioned_particles_bounds_true  = thrust::copy_if( thrust::device,
-                                                                         first_particle, last_particle,
-                                                                         // Dont overwrite the particle_to_inject (at the start of the array)
-                                                                         first_particle_to_inject + new_particle_to_inject_count,
-                                                                         OutOfClusterPredicate<ClusterType>{ cluster_type } );
-        const auto partitioned_particles_bounds_false = thrust::remove_copy_if( thrust::device,
-                                                                                first_particle, last_particle,
-                                                                                // Do the copy with a destination
-                                                                                // starting from partitioned_particles_bounds_true
-                                                                                partitioned_particles_bounds_true,
-                                                                                OutOfClusterPredicate<ClusterType>{ cluster_type } );
-
-        // Compute or recompute the cluster index of the particle_to_inject
-        // NOTE:
-        // - we can "save" some work here if cluster index is already computed
-        // for the new particles to inject (not the one we got with copy_if).
-        //
-        doComputeParticleClusterKey( first_particle_to_inject,
-                                     partitioned_particles_bounds_true,
-                                     cluster_type );
-
-        const auto first_particle_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject );
-        const auto particle_to_rekey_count         = std::distance( first_particle_to_inject,
-                                                                    partitioned_particles_bounds_true );
-
-        doSortParticleByKey( particle_to_inject.getPtrCellKeys(),
-                             particle_to_inject.getPtrCellKeys() + particle_to_rekey_count,
-                             first_particle_to_inject_no_key );
-
-        // This free generates a lot of memory fragmentation.
-        // particle_container.free();
-        // Same as for particle_to_inject, non-initializing vector is best.
-        particle_container.softReserve( new_particle_count );
-        particle_container.resize( new_particle_count );
-
-        // Merge by key
-        // NOTE: Dont merge in place on GPU. That means we need an other large buffer!
-        //
-        thrust::merge_by_key( thrust::device,
-                              particle_to_inject.getPtrCellKeys(),                           // Input range 1, first key
-                              particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 1, last key
-                              particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 2, first key
-                              particle_to_inject.getPtrCellKeys() + new_particle_count,      // Input range 2, last key
-                              first_particle_to_inject_no_key,                               // Input range 1, first value
-                              first_particle_to_inject_no_key + particle_to_rekey_count,     // Input range 2, first value
-                              particle_container.getPtrCellKeys(),                           // Output range first key
-                              particle_no_key_iterator_provider( particle_container ) );     // Output range first value
-
-        // Recompute bins
-        computeBinIndex( particle_container );
-
-        // This free generates a lot of memory fragmentation. If we enable it we
-        // reduce significantly the memory usage over time but a memory spike
-        // will still be present. Unfortunately, this free generates soo much
-        // fragmentation (like the one above) that at some point the GPU memory
-        // allocator will fail!
-        // particle_to_inject.free();
     }
 
-
-    ////////////////////////////////////////////////////////////////////////////////
-    // Cluster2D method definitions
-    ////////////////////////////////////////////////////////////////////////////////
-
     template <Cluster::DifferenceType kClusterWidth>
     Cluster2D<kClusterWidth>::Cluster2D( double   inverse_x_cell_dimension,
                                          double   inverse_y_cell_dimension,
                                          SizeType local_x_dimension_in_cell,
                                          SizeType local_y_dimension_in_cell,
-					 int CellStartingGlobalIndex_for_x, int CellStartingGlobalIndex_for_y )
+                                         int CellStartingGlobalIndex_for_x, int CellStartingGlobalIndex_for_y )
         : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
         , inverse_of_y_cell_dimension_{ inverse_y_cell_dimension }
         , local_y_dimension_in_cluster_{ local_y_dimension_in_cell / kClusterWidth }
         , CellStartingGlobalIndex_for_x_{CellStartingGlobalIndex_for_x}
-	, CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
+        , CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
     {
-        // EMPTY
     }
 
     template <Cluster::DifferenceType kClusterWidth>
@@ -571,7 +409,7 @@ namespace detail {
                                          SizeType local_x_dimension_in_cell,
                                          SizeType local_y_dimension_in_cell,
                                          SizeType local_z_dimension_in_cell,
-					 int CellStartingGlobalIndex_for_x,
+                                         int CellStartingGlobalIndex_for_x,
                                          int CellStartingGlobalIndex_for_y, int CellStartingGlobalIndex_for_z )
         : inverse_of_x_cell_dimension_{ inverse_x_cell_dimension }
         , inverse_of_y_cell_dimension_{ inverse_y_cell_dimension }
@@ -582,7 +420,30 @@ namespace detail {
         , CellStartingGlobalIndex_for_y_{CellStartingGlobalIndex_for_y}
         , CellStartingGlobalIndex_for_z_{CellStartingGlobalIndex_for_z}
     {
-        // EMPTY
+    }
+
+    template <Cluster::DifferenceType kClusterWidth>
+    template <typename Tuple>
+    __host__ __device__ typename Cluster1D<kClusterWidth>::IDType
+    Cluster1D<kClusterWidth>::Index( const Tuple& a_particle ) const
+    {
+        const SizeType local_x_particle_coordinate_in_cell = static_cast<SizeType>( thrust::get<1>( a_particle ) *
+                                                                                    inverse_of_x_cell_dimension_ ) -
+                                                             CellStartingGlobalIndex_for_x_;
+
+        // These divisions will be optimized.
+        // The integer division rounding behavior is expected.
+
+        // NOTE: Flat tiles have been studied but were not as efficient for the
+        // projection. The square provides the minimal perimeter (and thus ghost
+        // cell amount) for a given area.
+        static constexpr SizeType x_cluster_dimension_in_cell = kClusterWidth;
+
+        const SizeType local_x_particle_cluster_coordinate_in_cluster = local_x_particle_coordinate_in_cell / x_cluster_dimension_in_cell;
+
+        const SizeType cluster_index = local_x_particle_cluster_coordinate_in_cluster;
+
+        return static_cast<IDType>( cluster_index );
     }
 
     template <Cluster::DifferenceType kClusterWidth>
@@ -658,6 +519,22 @@ namespace detail {
         return static_cast<IDType>( cluster_index );
     }
 
+    template <Cluster::DifferenceType kClusterWidth>
+    void
+    Cluster1D<kClusterWidth>::computeParticleClusterKey( nvidiaParticles& particle_container,
+                                                         const Params&    parameters,
+                                                         const Patch&     a_parent_patch )
+    {
+        const auto first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
+                                                                          static_cast<const double*>( particle_container.getPtrPosition( 0 ) ) ) );
+        const auto last  = first + particle_container.deviceSize();
+        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
+        doComputeParticleClusterKey( first, last,
+                                     Cluster1D<Params::getGPUClusterWidth( 1 )>{ parameters.res_space[0],
+                                                                                          parameters.patch_size_[0],
+                                                                                          CellStartingGlobalIndex_for_x} );
+    }
+
     template <Cluster::DifferenceType kClusterWidth>
     void
     Cluster2D<kClusterWidth>::computeParticleClusterKey( nvidiaParticles& particle_container,
@@ -670,7 +547,7 @@ namespace detail {
         const auto last  = first + particle_container.deviceSize();
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-	doComputeParticleClusterKey( first, last,
+        doComputeParticleClusterKey( first, last,
                                      Cluster2D<Params::getGPUClusterWidth( 2 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.patch_size_[0],
@@ -693,7 +570,7 @@ namespace detail {
         int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
         int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
         int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-	doComputeParticleClusterKey( first, last,
+        doComputeParticleClusterKey( first, last,
                                      Cluster3D<Params::getGPUClusterWidth( 3 )>{ parameters.res_space[0],
                                                                                           parameters.res_space[1],
                                                                                           parameters.res_space[2],
@@ -705,277 +582,6 @@ namespace detail {
                                                                                           CellStartingGlobalIndex_for_z } );
     }
 
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster2D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
-                                                 const Params& )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // The appropriate thrust::zip_iterator for the current
-                // simulation's parameters
-
-                const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                        particle_container.getPtrPosition( 1 ),
-                                                                                        particle_container.getPtrMomentum( 0 ),
-                                                                                        particle_container.getPtrMomentum( 1 ),
-                                                                                        particle_container.getPtrMomentum( 2 ),
-                                                                                        particle_container.getPtrWeight(),
-                                                                                        particle_container.getPtrCharge() ) );
-
-                doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                     particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                     value_first );
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster3D<kClusterWidth>::sortParticleByKey( nvidiaParticles& particle_container,
-                                                 const Params& )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // The appropriate thrust::zip_iterator for the current
-                // simulation's parameters
-
-                if (particle_container.tracked) {
-                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                            particle_container.getPtrPosition( 1 ),
-                                                                                            particle_container.getPtrPosition( 2 ),
-                                                                                            particle_container.getPtrMomentum( 0 ),
-                                                                                            particle_container.getPtrMomentum( 1 ),
-                                                                                            particle_container.getPtrMomentum( 2 ),
-                                                                                            particle_container.getPtrWeight(),
-                                                                                            particle_container.getPtrCharge(),
-                                                                                            particle_container.getPtrId() ) );
-                    doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                         value_first );
-
-                }
-                else {
-                    const auto value_first = thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                                            particle_container.getPtrPosition( 1 ),
-                                                                                            particle_container.getPtrPosition( 2 ),
-                                                                                            particle_container.getPtrMomentum( 0 ),
-                                                                                            particle_container.getPtrMomentum( 1 ),
-                                                                                            particle_container.getPtrMomentum( 2 ),
-                                                                                            particle_container.getPtrWeight(),
-                                                                                            particle_container.getPtrCharge() ) );
-                    doSortParticleByKey( particle_container.getPtrCellKeys(),
-                                         particle_container.getPtrCellKeys() + particle_container.deviceSize(),
-                                         value_first );
-                }
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster2D<kClusterWidth>::importAndSortParticles( nvidiaParticles& particle_container,
-                                                      nvidiaParticles& particle_to_inject,
-                                                      const Params&    parameters,
-                                                      const Patch&     a_parent_patch )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-
-        const Cluster2D cluster_manipulator{ parameters.res_space[0],
-                                             parameters.res_space[1],
-                                             parameters.patch_size_[0],
-                                             parameters.patch_size_[1],
-                                             CellStartingGlobalIndex_for_x, CellStartingGlobalIndex_for_y};
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // Returns the appropriate thrust::zip_iterator for the
-                // current simulation's parameters
-                const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                };
-
-                const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                    return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                };
-
-                doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-            }
-        }
-    }
-
-    template <Cluster::DifferenceType kClusterWidth>
-    void
-    Cluster3D<kClusterWidth>::importAndSortParticles( nvidiaParticles& particle_container,
-                                                      nvidiaParticles& particle_to_inject,
-                                                      const Params&    parameters,
-                                                      const Patch&     a_parent_patch )
-    {
-        // This is where we do a runtime dispatch depending on the simulation's
-        // qed/radiation settings.
-
-        // NOTE: For now we support dont support qed/radiations. Performance
-        // comes from specialization.
-
-        // TODO(Etienne M): Find a better way to dispatch at runtime. This is
-        // complex to read and to maintain.
-        int CellStartingGlobalIndex_for_x = a_parent_patch.getCellStartingGlobalIndex_noGC(0);
-        int CellStartingGlobalIndex_for_y = a_parent_patch.getCellStartingGlobalIndex_noGC(1);
-        int CellStartingGlobalIndex_for_z = a_parent_patch.getCellStartingGlobalIndex_noGC(2);
-
-        const Cluster3D cluster_manipulator{ parameters.res_space[0],
-                                             parameters.res_space[1],
-                                             parameters.res_space[2],
-                                             parameters.patch_size_[0],
-                                             parameters.patch_size_[1],
-                                             parameters.patch_size_[2],
-                                             CellStartingGlobalIndex_for_x, 
-                                             CellStartingGlobalIndex_for_y, CellStartingGlobalIndex_for_z};
-
-        if( particle_container.has_quantum_parameter ) {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                SMILEI_ASSERT( false );
-            }
-        } else {
-            if( particle_container.has_Monte_Carlo_process ) {
-                SMILEI_ASSERT( false );
-            } else {
-                // Returns the appropriate thrust::zip_iterator for the
-                // current simulation's parameters
-                if (particle_container.tracked) {
-                    const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge(),
-                                                                          particle_container.getPtrId() ) );
-                    };
-                    const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge(),
-                                                                          particle_container.getPtrId() ) );
-                    };
-                    doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-                }
-                else {
-                    const auto particle_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrCellKeys(),
-                                                                          particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                    };
-
-                    const auto particle_no_key_iterator_provider = []( nvidiaParticles& particle_container ) {
-                        return thrust::make_zip_iterator( thrust::make_tuple( particle_container.getPtrPosition( 0 ),
-                                                                          particle_container.getPtrPosition( 1 ),
-                                                                          particle_container.getPtrPosition( 2 ),
-                                                                          particle_container.getPtrMomentum( 0 ),
-                                                                          particle_container.getPtrMomentum( 1 ),
-                                                                          particle_container.getPtrMomentum( 2 ),
-                                                                          particle_container.getPtrWeight(),
-                                                                          particle_container.getPtrCharge() ) );
-                    };
-
-                    doImportAndSortParticles( particle_container,
-                                          particle_to_inject,
-                                          cluster_manipulator,
-                                          particle_iterator_provider,
-                                          particle_no_key_iterator_provider );
-                }
-            }
-        }
-    }
-
 } // namespace detail
 
 
@@ -990,7 +596,6 @@ nvidiaParticles::nvidiaParticles( const Params& parameters,
     , parent_patch_{ &a_parent_patch }
     , gpu_nparts_{}
 {
-    // EMPTY
 }
 
 nvidiaParticles::~nvidiaParticles() {
@@ -1000,13 +605,7 @@ nvidiaParticles::~nvidiaParticles() {
     }
 }
 
-void nvidiaParticles::resizeDimensions( unsigned int nDim )
-{
-    nvidia_position_.resize( nDim );
-    nvidia_momentum_.resize( 3 );
-}
-
-void nvidiaParticles::softReserve( unsigned int particle_count, float growth_factor  )
+void nvidiaParticles::deviceReserve( unsigned int particle_count, float growth_factor  )
 {
     if( particle_count <= deviceCapacity() ) {
         // Dont reserve, for now we have enough capacity.
@@ -1015,23 +614,12 @@ void nvidiaParticles::softReserve( unsigned int particle_count, float growth_fac
 
     const unsigned int new_capacity = static_cast<unsigned int>( particle_count * growth_factor );
 
-    for( unsigned int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].reserve( new_capacity );
-    }
-
-    for( unsigned int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].reserve( new_capacity );
-    }
-
-    nvidia_weight_.reserve( new_capacity );
-    nvidia_charge_.reserve( new_capacity );
-
-    if( has_quantum_parameter ) {
-        nvidia_chi_.reserve( new_capacity );
+    for( auto prop: nvidia_double_prop_ ) {
+        prop->reserve( new_capacity );
     }
 
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.reserve( new_capacity );
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->reserve( new_capacity );
     }
 
     if( tracked ) {
@@ -1041,137 +629,41 @@ void nvidiaParticles::softReserve( unsigned int particle_count, float growth_fac
     nvidia_cell_keys_.reserve( new_capacity );
 }
 
-void nvidiaParticles::reserve( unsigned int particle_count )
-{
-    for( unsigned int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].reserve( particle_count );
-    }
-
-    for( unsigned int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].reserve( particle_count );
-    }
-
-    nvidia_weight_.reserve( particle_count );
-    nvidia_charge_.reserve( particle_count );
-
-    if( has_quantum_parameter ) {
-        nvidia_chi_.reserve( particle_count );
-    }
-
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.reserve( particle_count );
-    }
-
-    if( tracked ) {
-        nvidia_id_.reserve( particle_count );
-    }
-
-    nvidia_cell_keys_.reserve( particle_count );
-}
-
-void nvidiaParticles::resize( unsigned int particle_count )
-{
-
-    // TODO(Etienne M): Use non-initializing vector/allocator (dont pay the cost
-    // of what you dont use) ?
-
-    for( int idim = 0; idim < nvidia_position_.size(); idim++ ) {
-        nvidia_position_[idim].resize( particle_count );
-    }
-
-    for( int idim = 0; idim < 3; idim++ ) {
-        nvidia_momentum_[idim].resize( particle_count );
-    }
-
-    nvidia_weight_.resize( particle_count );
-    nvidia_charge_.resize( particle_count );
-
-    if( has_quantum_parameter ) {
-        nvidia_chi_.resize( particle_count );
-    }
-
-    if( has_Monte_Carlo_process ) {
-        nvidia_tau_.resize( particle_count );
-    }
-
-    if( tracked ) {
-        nvidia_id_.resize( particle_count );
-    }
-
-    nvidia_cell_keys_.resize( particle_count );
-
-    gpu_nparts_ = particle_count;
-}
-
-void nvidiaParticles::free()
+void nvidiaParticles::deviceFree()
 {
-    for( auto& a_vector : nvidia_position_ ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( a_vector, a_dummy_vector );
-    }
-
-    for( auto& a_vector : nvidia_momentum_ ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( a_vector, a_dummy_vector );
+    for( auto prop: nvidia_double_prop_ ) {
+        thrust::device_vector<double>().swap( *prop );
     }
 
-    {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_weight_, a_dummy_vector );
-    }
-
-    {
-        thrust::device_vector<short> a_dummy_vector{};
-        std::swap( nvidia_charge_, a_dummy_vector );
-    }
-
-    if( has_quantum_parameter ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_chi_, a_dummy_vector );
-    }
-
-    if( has_Monte_Carlo_process ) {
-        thrust::device_vector<double> a_dummy_vector{};
-        std::swap( nvidia_tau_, a_dummy_vector );
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::device_vector<short>().swap( *prop );
     }
 
     if( tracked ) {
-        thrust::device_vector<uint64_t> a_dummy_vector{};
-        std::swap( nvidia_id_, a_dummy_vector );
+        thrust::device_vector<uint64_t>().swap( nvidia_id_ );
     }
 
-    {
-        thrust::device_vector<int> a_dummy_vector{};
-        std::swap( nvidia_cell_keys_, a_dummy_vector );
-    }
+    thrust::device_vector<int>().swap( nvidia_cell_keys_ );
 
     gpu_nparts_ = 0;
 }
 
-// ---------------------------------------------------------------------------------------------------------------------
-//! Resize particle vectors
-// ---------------------------------------------------------------------------------------------------------------------
 void nvidiaParticles::deviceResize( unsigned int new_size )
 {
-    for( unsigned int iprop=0 ; iprop<nvidia_double_prop_.size() ; iprop++ ) {
-        ( *nvidia_double_prop_[iprop] ).resize(new_size);
+    for( auto prop: nvidia_double_prop_ ) {
+        prop->resize( new_size );
     }
 
-    for( unsigned int iprop=0 ; iprop<nvidia_short_prop_.size() ; iprop++ ) {
-        ( *nvidia_short_prop_[iprop] ).resize(new_size);
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->resize( new_size );
     }
 
-    //
-    // for( unsigned int iprop=0 ; iprop<uint64_prop.size() ; iprop++ ) {
-    //     ( *nvidia_uint64_prop[iprop] ).resize( n_particles+n_additional_particles );
-    // }
-
-    if (tracked) {
+    if( tracked ) {
         nvidia_id_.resize( new_size );
     }
-
+    
     nvidia_cell_keys_.resize( new_size );
-
+    
     gpu_nparts_ = new_size;
 }
 
@@ -1181,20 +673,20 @@ void nvidiaParticles::deviceResize( unsigned int new_size )
 // ---------------------------------------------------------------------------------------------------------------------
 void nvidiaParticles::deviceClear()
 {
-    for( unsigned int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
-        nvidia_double_prop_[iprop]->clear();
+    for( auto prop: nvidia_double_prop_ ) {
+        prop->clear();
     }
 
-    for( unsigned int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
-        nvidia_short_prop_[iprop]->clear();
+    for( auto prop: nvidia_short_prop_ ) {
+        prop->clear();
     }
 
     // TODO(Etienne M): Clear cell keys too ?
 
-    if (tracked) {
+    if( tracked ) {
         nvidia_id_.clear();
     }
-
+    
     gpu_nparts_ = 0;
 }
 
@@ -1215,23 +707,18 @@ void nvidiaParticles::initializeDataOnDevice()
     // The world shall end if we call this function multiple times
     SMILEI_ASSERT( nvidia_double_prop_.empty() );
 
-    const auto kPositionDimension = Position.size();
-
     // We sure that we have as many say, position dimension as the base class.
-    resizeDimensions( kPositionDimension );
+    nvidia_position_.resize( Position.size() );
+    nvidia_momentum_.resize( 3 );
 
     // Initialize the list of pointers
-
-    for( unsigned int i = 0; i < kPositionDimension; i++ ) {
-        nvidia_double_prop_.push_back( &nvidia_position_[i] );
+    for( auto &pos: nvidia_position_ ) {
+        nvidia_double_prop_.push_back( &pos );
     }
-
-    for( unsigned int i = 0; i < 3; i++ ) {
-        nvidia_double_prop_.push_back( &nvidia_momentum_[i] );
+    for( auto &mom: nvidia_momentum_ ) {
+        nvidia_double_prop_.push_back( &mom );
     }
-
     nvidia_double_prop_.push_back( &nvidia_weight_ );
-
     nvidia_short_prop_.push_back( &nvidia_charge_ );
 
     // Quantum parameter (for QED effects):
@@ -1248,9 +735,9 @@ void nvidiaParticles::initializeDataOnDevice()
         nvidia_double_prop_.push_back( &nvidia_tau_ );
     }
 
-    const auto kHostParticleCount = Position[0].size();
+    const auto hostParticleCount = Position[0].size();
 
-    if( kHostParticleCount == 0 ) {
+    if( hostParticleCount == 0 ) {
         // Should we reserve some space ?
         // reserve( 100 );
     } else {
@@ -1271,14 +758,13 @@ void nvidiaParticles::initializeDataOnDevice()
 
         // setHostBinIndex();
     } else {
-
+        
         // At this point, a copy of the host particles and last_index is on the
         // device and we know we support the space dimension.
-
         detail::Cluster::computeParticleClusterKey( *this, *parameters_, *parent_patch_ );
 
         // The particles are not correctly sorted when created.
-        detail::Cluster::sortParticleByKey( *this, *parameters_ );
+        sortParticleByKey();
 
         detail::Cluster::computeBinIndex( *this );
         setHostBinIndex();
@@ -1299,7 +785,7 @@ void nvidiaParticles::initializeIDsOnDevice()
 // -------------------------------------------------------------------------------------------------
 void nvidiaParticles::copyFromHostToDevice()
 {
-    resize( Position[0].size() );
+    deviceResize( Position[0].size() );
 
     for( int idim = 0; idim < Position.size(); idim++ ) {
         thrust::copy( Position[idim].begin(), Position[idim].end(), nvidia_position_[idim].begin() );
@@ -1308,7 +794,6 @@ void nvidiaParticles::copyFromHostToDevice()
     for( int idim = 0; idim < Momentum.size(); idim++ ) {
         thrust::copy( Momentum[idim].begin(), Momentum[idim].end(), nvidia_momentum_[idim].begin() );
     }
-
     thrust::copy( Weight.begin(), Weight.end(), nvidia_weight_.begin() );
 
     thrust::copy( Charge.begin(), Charge.end(), nvidia_charge_.begin() );
@@ -1329,7 +814,7 @@ void nvidiaParticles::copyFromHostToDevice()
 // -------------------------------------------------------------------------------------------------
 //! Copy device to host
 // -------------------------------------------------------------------------------------------------
-void nvidiaParticles::copyFromDeviceToHost()
+void nvidiaParticles::copyFromDeviceToHost( bool copy_keys )
 {
     for (int idim=0;idim<Position.size();idim++) {
         Position[idim].resize( gpu_nparts_ );
@@ -1355,6 +840,10 @@ void nvidiaParticles::copyFromDeviceToHost()
         Id.resize( gpu_nparts_ );
         thrust::copy((nvidia_id_).begin(), (nvidia_id_).begin()+gpu_nparts_, (Id).begin());
     }
+    if (copy_keys) {
+        cell_keys.resize( gpu_nparts_ );
+        thrust::copy((nvidia_cell_keys_).begin(), (nvidia_cell_keys_).begin()+gpu_nparts_, (cell_keys).begin());
+    }
 }
 
 unsigned int nvidiaParticles::deviceCapacity() const
@@ -1365,97 +854,83 @@ unsigned int nvidiaParticles::deviceCapacity() const
 }
 
 // -----------------------------------------------------------------------------
-//! Extract particles from the Particles object and put
-//! them in the Particles object `particles_to_move`
+//! Move leaving particles to the buffer
 // -----------------------------------------------------------------------------
-void nvidiaParticles::extractParticles( Particles* particles_to_move )
+void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer )
 {
-    // TODO(Etienne M): We are doing extra work. We could use something like
-    // std::partition to output the invalidated particles in particles_to_move
-    // and keep the good ones. This would help us avoid the std::remove_if in
-    // the particle injection and sorting algorithm.
-
-    // Manage the send data structure
-    nvidiaParticles* const cp_parts                 = static_cast<nvidiaParticles*>( particles_to_move );
-    const int              nparts                   = gpu_nparts_;
-    const int              position_dimension_count = nvidia_position_.size();
-
-    const int nparts_to_move = thrust::count_if( thrust::device,
-                                                 nvidia_cell_keys_.cbegin(),
-                                                 nvidia_cell_keys_.cbegin() + nparts,
-                                                 count_if_out() );
-
-    // Resize it, if too small (copy_if do not resize)
-    cp_parts->resize( nparts_to_move );
-
-    // Iterator of the main data structure
-    // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator
-    const auto source_iterator_first      = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                           nvidia_momentum_[0].begin(),
-                                                                                           nvidia_momentum_[1].begin(),
-                                                                                           nvidia_momentum_[2].begin(),
-                                                                                           nvidia_weight_.begin(),
-                                                                                           nvidia_charge_.begin() ) );
-    const auto source_iterator_last       = source_iterator_first + nparts; // std::advance
-    const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].begin(),
-                                                                                           cp_parts->nvidia_momentum_[0].begin(),
-                                                                                           cp_parts->nvidia_momentum_[1].begin(),
-                                                                                           cp_parts->nvidia_momentum_[2].begin(),
-                                                                                           cp_parts->nvidia_weight_.begin(),
-                                                                                           cp_parts->nvidia_charge_.begin() ) );
-
-    // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if keeped, new PartBoundCond::apply(...))
-    thrust::copy_if( thrust::device,
-                     source_iterator_first,
-                     source_iterator_last,
-                     // Copy depending on count_if_out()(nvidia_cell_keys_[i])
-                     nvidia_cell_keys_.cbegin(),
-                     destination_iterator_first,
-                     count_if_out() );
-
-    // Copy the other position values depending on the simulation's grid
-    // dimensions
-    for( int i = 1; i < position_dimension_count; ++i ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_position_[i].cbegin(),
-                         nvidia_position_[i].cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_position_[i].begin(),
-                         count_if_out() );
-    }
+    copyParticlesByPredicate( buffer, cellKeyBelow<-1>() );
+    buffer->copyFromDeviceToHost( true );
+}
 
-    // Special treatment for chi if radiation emission
-    if( has_quantum_parameter ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_chi_.cbegin(),
-                         nvidia_chi_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_chi_.begin(),
-                         count_if_out() );
-    }
 
-    if( has_Monte_Carlo_process ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_tau_.cbegin(),
-                         nvidia_tau_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_tau_.begin(),
-                         count_if_out() );
+//! Copy particles which statisfy some predicate
+template<typename Predicate>
+void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pred )
+{
+    // Count particles satisfying the predicate
+    const auto keys = getPtrCellKeys();
+    const int nparts_to_copy = thrust::count_if( thrust::device, keys, keys + gpu_nparts_, pred );
+    
+    // Resize destination buffer (copy_if does not resize)
+    nvidiaParticles* const dest = static_cast<nvidiaParticles*>( buffer );
+    dest->deviceResize( nparts_to_copy );
+    
+    if( nparts_to_copy ) {
+        // Copy the particles to the destination
+        for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+            const auto in = nvidia_double_prop_[ip]->begin();
+            const auto out = dest->nvidia_double_prop_[ip]->begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
+        }
+        for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+            const auto in = nvidia_short_prop_[ip]->begin();
+            const auto out = dest->nvidia_short_prop_[ip]->begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
+        }
+        if( tracked ) {
+            const auto in = nvidia_id_.begin();
+            const auto out = dest->nvidia_id_.begin();
+            thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
+        }
+        const auto in = nvidia_cell_keys_.begin();
+        const auto out = dest->nvidia_cell_keys_.begin();
+        thrust::copy_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + gpu_nparts_, keys, out, pred );
+        SMILEI_ACCELERATOR_DEVICE_SYNC();
     }
+}
 
+int nvidiaParticles::addParticles( Particles* particles_to_inject )
+{
+    const auto nparts = gpu_nparts_;
+    nvidiaParticles* to_inject = static_cast<nvidiaParticles*>( particles_to_inject );
+    deviceResize( nparts + to_inject->gpu_nparts_ );
+    pasteParticles( to_inject, nparts, 0 );
+    return to_inject->gpu_nparts_;
+}
+
+void nvidiaParticles::pasteParticles( nvidiaParticles* particles_to_inject, size_t offset_in_output, size_t offset_in_input )
+{
+    const auto n = particles_to_inject->gpu_nparts_ - (int) offset_in_input;
+    
+    // Copy the particles to the destination
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        const auto in = particles_to_inject->nvidia_double_prop_[ip]->begin() + offset_in_input;
+        const auto out = nvidia_double_prop_[ip]->begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
+    }
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        const auto in = particles_to_inject->nvidia_short_prop_[ip]->begin() + offset_in_input;
+        const auto out = nvidia_short_prop_[ip]->begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
+    }
     if( tracked ) {
-        thrust::copy_if( thrust::device,
-                         nvidia_id_.cbegin(),
-                         nvidia_id_.cbegin() + nparts,
-                         nvidia_cell_keys_.cbegin(),
-                         cp_parts->nvidia_id_.begin(),
-                         count_if_out() );
+        const auto in = particles_to_inject->nvidia_id_.begin() + offset_in_input;
+        const auto out = nvidia_id_.begin() + offset_in_output;
+        thrust::copy_n( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, n, out );
     }
-
-    particles_to_move->copyFromDeviceToHost();
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
 }
 
-
 // -----------------------------------------------------------------------------
 //! Erase `npart` particles from `ipart`
 // -----------------------------------------------------------------------------
@@ -1475,158 +950,49 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move )
 //                           std::begin( nvidia_position_[i] ),
 //                           std::begin( nvidia_position_[i] ) + nparts,
 //                           std::cbegin( nvidia_cell_keys_ ),
-//                           count_if_out() );
+//                           cellKeyEquals<-1>() );
 //    }
 //
 //}
 
 // -----------------------------------------------------------------------------
-//! Erase particles leaving the patch object on device
+//! Erase particles leaving the patch on device
 // -----------------------------------------------------------------------------
 int nvidiaParticles::eraseLeavingParticles()
 {
-    const int position_dimension_count = nvidia_position_.size();
-    const int nparts                   = gpu_nparts_;
-    const int nparts_to_remove         = thrust::count_if( thrust::device,
-                                                           nvidia_cell_keys_.begin(),
-                                                           nvidia_cell_keys_.begin() + nparts,
-                                                           count_if_out() );
-
-
-    if( nparts_to_remove > 0 ) {
-        const auto first_particle = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                   nvidia_momentum_[0].begin(),
-                                                                                   nvidia_momentum_[1].begin(),
-                                                                                   nvidia_momentum_[2].begin(),
-                                                                                   nvidia_weight_.begin(),
-                                                                                   nvidia_charge_.begin() ) );
-
-        const auto last_particle = first_particle + nparts;
-
-        // Remove particles which leaves current patch
-        thrust::remove_if( thrust::device,
-                           first_particle,
-                           last_particle,
-                           nvidia_cell_keys_.cbegin(),
-                           count_if_out() );
-
-        // Remove the other position values depending on the simulation's grid
-        // dimensions
-        for( int i = 1; i < position_dimension_count; ++i ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_position_[i].begin(),
-                               nvidia_position_[i].begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
-        }
-
-        if( has_quantum_parameter ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_chi_.begin(),
-                               nvidia_chi_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
-        }
-
-        if( has_Monte_Carlo_process ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_tau_.begin(),
-                               nvidia_tau_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
-        }
-
-        if( tracked ) {
-            thrust::remove_if( thrust::device,
-                               nvidia_id_.begin(),
-                               nvidia_id_.begin() + nparts,
-                               nvidia_cell_keys_.cbegin(),
-                               count_if_out() );
-        }
-
-        // Update current number of particles
-        gpu_nparts_ -= nparts_to_remove;
-
-        // Resize data structures (remove_if does not resize)
-        resize( gpu_nparts_ );
-    }
-
-    return nparts_to_remove;
+    const auto nremoved = eraseParticlesByPredicate( cellKeyBelow<0>(), 0 );
+    deviceResize( gpu_nparts_ - nremoved );
+    return nremoved;
 }
 
-int nvidiaParticles::injectParticles( Particles* particles_to_inject )
+//! "Erase" particles but does not resize the arrays!
+template<typename Predicate>
+int nvidiaParticles::eraseParticlesByPredicate( Predicate pred, size_t offset )
 {
-    const int nparts = gpu_nparts_;
-
-    // Manage the recv data structure
-    nvidiaParticles* const cp_parts = static_cast<nvidiaParticles*>( particles_to_inject );
-
-    const int nparts_add = cp_parts->gpu_nparts_;
-    const int tot_parts  = nparts + nparts_add;
-
-    const int position_dimension_count = nvidia_position_.size();
-
-    // Resize main data structure, if too small (copy_n do not resize)
-    resize( tot_parts );
-
-    const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[0].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[1].cbegin(),
-                                                                                      cp_parts->nvidia_momentum_[2].cbegin(),
-                                                                                      cp_parts->nvidia_weight_.cbegin(),
-                                                                                      cp_parts->nvidia_charge_.cbegin() ) );
-
-    // Iterator of the main data structure (once it has been resized)
-    const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(),
-                                                                                           nvidia_momentum_[0].begin(),
-                                                                                           nvidia_momentum_[1].begin(),
-                                                                                           nvidia_momentum_[2].begin(),
-                                                                                           nvidia_weight_.begin(),
-                                                                                           nvidia_charge_.begin() ) ) +
-                                            nparts;
-
-    // Copy recv particles in main data structure
-    thrust::copy_n( thrust::device,
-                    source_iterator_first,
-                    nparts_add,
-                    destination_iterator_first );
-
-    // Remove the other position values depending on the simulation's grid
-    // dimensions
-    for( int i = 1; i < position_dimension_count; ++i ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_position_[i].cbegin(),
-                        nparts_add,
-                        nvidia_position_[i].begin() + nparts );
+    const auto keys = getPtrCellKeys();
+    const int nparts_to_remove = thrust::count_if( thrust::device, keys + offset, keys + gpu_nparts_, pred );
+    
+    // Copy the particles to the destination
+    // Using more memory, we could use the faster remove_copy_if
+    // NOTE: remove_if is stable.
+    for( auto prop: nvidia_double_prop_ ) {
+        const auto in = prop->begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
-
-    if( has_quantum_parameter ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_chi_.cbegin(),
-                        nparts_add,
-                        nvidia_chi_.begin() + nparts );
+    for( auto prop: nvidia_short_prop_ ) {
+        const auto in = prop->begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
-
-    if( has_Monte_Carlo_process ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_tau_.cbegin(),
-                        nparts_add,
-                        nvidia_tau_.begin() + nparts );
-    }
-
     if( tracked ) {
-        thrust::copy_n( thrust::device,
-                        cp_parts->nvidia_id_.cbegin(),
-                        nparts_add,
-                        nvidia_id_.begin() + nparts );
+        const auto in = nvidia_id_.begin();
+        thrust::remove_if( SMILEI_ACCELERATOR_ASYNC_POLYCY, in + offset, in + gpu_nparts_, keys + offset, pred );
     }
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
 
-    // No more particles to move
-    cp_parts->resize( 0 );
-
-    return nparts_add;
+    return nparts_to_remove;
 }
 
+
 // ---------------------------------------------------------------------------------------------------------------------
 //! Create n_additional_particles new particles at the end of vectors
 //! Fill the new elements with 0
@@ -1635,29 +1001,22 @@ void nvidiaParticles::createParticles( int n_additional_particles )
 {
     int n_particles = gpu_nparts_;
     int new_size = n_particles + n_additional_particles;
-    for( unsigned int iprop=0 ; iprop<nvidia_double_prop_.size() ; iprop++ ) {
-        ( *nvidia_double_prop_[iprop] ).resize(new_size);
-         thrust::fill(( *nvidia_double_prop_[iprop] ).begin() + n_particles, ( *nvidia_double_prop_[iprop] ).begin() + new_size, 0);
+    
+    deviceResize( new_size );
+    
+    for( auto prop: nvidia_double_prop_ ) {
+         thrust::fill( prop->begin() + n_particles, prop->begin() + new_size, 0);
     }
-
-    for( unsigned int iprop=0 ; iprop<nvidia_short_prop_.size() ; iprop++ ) {
-        ( *nvidia_short_prop_[iprop] ).resize(new_size);
-        thrust::fill(( *nvidia_short_prop_[iprop] ).begin() + n_particles, ( *nvidia_short_prop_[iprop] ).begin() + new_size, 0);
+    
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::fill( prop->begin() + n_particles, prop->begin() + new_size, 0);
     }
-
-    // for( unsigned int iprop=0 ; iprop<uint64_prop.size() ; iprop++ ) {
-    //     ( *nvidia_uint64_prop[iprop] ).resize( n_particles+n_additional_particles );
-    // }
-
-    if (tracked) {
-        nvidia_id_.resize( new_size );
+    
+    if( tracked ) {
         thrust::fill( nvidia_id_.begin() + n_particles, nvidia_id_.begin() + new_size, 0 );
     }
-
-    nvidia_cell_keys_.resize( new_size );
+    
     thrust::fill( nvidia_cell_keys_.begin() + n_particles, nvidia_cell_keys_.begin() + new_size, -1 );
-
-    gpu_nparts_ = new_size;
 }
 
 //! Import Particles and sort depending if Binning is available or not
@@ -1676,11 +1035,92 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject )
     setHostBinIndex();
 }
 
+//! Sort by cell_keys_
+//! This version synchronizes for every vector, but uses less buffers
+void nvidiaParticles::sortParticleByKey()
+{
+    // Make a sorting map using the cell keys (like numpy.argsort)
+    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::sequence( thrust::device, index.begin(), index.end() );
+    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+    
+    // Sort particles using thrust::gather, according to the sorting map
+    thrust::device_vector<double> buffer( gpu_nparts_ );
+    for( auto prop: nvidia_double_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer.begin() );
+        prop->swap( buffer );
+    }
+    buffer.clear();
+    thrust::device_vector<short> buffer_short( gpu_nparts_ );
+    for( auto prop: nvidia_short_prop_ ) {
+        thrust::gather( thrust::device, index.begin(), index.end(), prop->begin(), buffer_short.begin() );
+        prop->swap( buffer_short );
+    }
+    buffer_short.clear();
+    if( tracked ) {
+        thrust::device_vector<uint64_t> buffer_uint64( gpu_nparts_ );
+        thrust::gather( thrust::device, index.begin(), index.end(), nvidia_id_.begin(), buffer_uint64.begin() );
+        nvidia_id_.swap( buffer_uint64 );
+        buffer_uint64.clear();
+    }
+}
+
+//! Sort by cell_keys_
+//! This version is asynchronous, but requires a buffer of equal size to be provided
+void nvidiaParticles::sortParticleByKey( nvidiaParticles &buffer )
+{
+    // Make a sorting map using the cell keys (like numpy.argsort)
+    thrust::device_vector<int> index( gpu_nparts_ );
+    thrust::sequence( thrust::device, index.begin(), index.end() );
+    thrust::sort_by_key( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.end(), index.begin() );
+    
+    // Sort particles using thrust::gather, according to the sorting map
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_double_prop_[ip]->begin(), buffer.nvidia_double_prop_[ip]->begin() );
+    }
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_short_prop_[ip]->begin(), buffer.nvidia_short_prop_[ip]->begin() );
+    }
+    if( tracked ) {
+        thrust::gather( SMILEI_ACCELERATOR_ASYNC_POLYCY, index.begin(), index.end(), nvidia_id_.begin(), buffer.nvidia_id_.begin() );
+    }
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+    
+    // Swap properties with their buffer
+    for( int iprop = 0; iprop < nvidia_double_prop_.size(); iprop++ ) {
+        nvidia_double_prop_[iprop]->swap( *buffer.nvidia_double_prop_[iprop] );
+    }
+    for( int iprop = 0; iprop < nvidia_short_prop_.size(); iprop++ ) {
+        nvidia_short_prop_[iprop]->swap( *buffer.nvidia_short_prop_[iprop] );
+    }
+    if( tracked ) {
+        nvidia_id_.swap( buffer.nvidia_id_ );
+    }
+}
+
+
+void nvidiaParticles::scatterParticles( nvidiaParticles &dest, const thrust::device_vector<int> &index )
+{
+    const auto n = std::min( (int) index.size(), gpu_nparts_ );
+    for( int ip = 0; ip < nvidia_double_prop_.size(); ip++ ) {
+        const auto in = nvidia_double_prop_[ip]->begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_double_prop_[ip]->begin() );
+    }
+    for( int ip = 0; ip < nvidia_short_prop_.size(); ip++ ) {
+        const auto in = nvidia_short_prop_[ip]->begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_short_prop_[ip]->begin() );
+    }
+    if( tracked ) {
+        const auto in = nvidia_id_.begin();
+        thrust::scatter( SMILEI_ACCELERATOR_ASYNC_POLYCY, in, in + n, index.begin(), dest.nvidia_id_.begin() );
+    }
+    SMILEI_ACCELERATOR_DEVICE_SYNC();
+}
+
 int nvidiaParticles::prepareBinIndex()
 {
     if( first_index.size() == 0 ) {
-        // Some Particles object like particles_to_move do not have allocated
-        // bins, we skip theses.
+        // Some Particles object do not have allocated bins, we skip theses.
         return -1;
     }
 
@@ -1740,7 +1180,10 @@ void nvidiaParticles::naiveImportAndSortParticles( nvidiaParticles* particles_to
     eraseLeavingParticles();
 
     // Inject newly arrived particles in particles_to_inject
-    injectParticles( particles_to_inject );
+    const size_t current_size = gpu_nparts_;
+    deviceResize( current_size + particles_to_inject->size() );
+    pasteParticles( particles_to_inject, current_size, 0 );
+    particles_to_inject->clear();
 }
 
 extern "C"
diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h
index 249a9fcf2..a02edffc8 100644
--- a/src/Particles/nvidiaParticles.h
+++ b/src/Particles/nvidiaParticles.h
@@ -34,33 +34,20 @@ class nvidiaParticles : public Particles
     //! Destructor for nvidiaParticles
     ~nvidiaParticles();
 
-    //! Allocate the right amount of position and momentum dimensions
-    void resizeDimensions( unsigned int nDim );
-
     //! Reserve space for (particle_count * growth_factor) particles only if 
     //! particle_count >= deviceCapacity(). Must be called after
     //! allocateDimensions()
-    void softReserve( unsigned int particle_count, float growth_factor = 1.3F );
-
-    //! Reserve space for particle_count particles. Must be called after
-    //! allocateDimensions()
-    void reserve( unsigned int particle_count );
-
-    //! Allocate particle_count particles. Must be called after
-    //! allocateDimensions()
-    //! Set the size (deviceSize) of nvidiaParticles to particle_count.
-    //!
-    void resize( unsigned int particle_count );
+    void deviceReserve( unsigned int particle_count, float growth_factor = 1.3F );
 
     //! Assures that the memory holden by the nvidia_[position|momentum|weight|
     //! charge|chi|tau|cell_keys]_ is freed. This is not something you can
     //! achieve via a naive resize.
     //! The pointers in nvidia_[double|short]_prop_ are not invalidated.
     //!
-    void free();
+    void deviceFree();
 
     //! Resize Particle vectors on device
-    void deviceResize(unsigned int new_size);
+    void deviceResize( unsigned int new_size );
 
     //! Remove all particles
     void deviceClear();
@@ -78,7 +65,7 @@ class nvidiaParticles : public Particles
     void copyFromHostToDevice() override;
     
     //! Update the particles from device to host
-    void copyFromDeviceToHost() override;
+    void copyFromDeviceToHost( bool copy_keys = false ) override;
 
     unsigned int deviceCapacity() const override;
 
@@ -113,21 +100,27 @@ class nvidiaParticles : public Particles
     };
 
     // -----------------------------------------------------------------------------
-    //! Extract particles from the Particles object and put
-    //! them in the Particles object `particles_to_move`
+    //! Move leaving particles to the buffers
     // -----------------------------------------------------------------------------
-    void extractParticles( Particles* particles_to_move ) override;
+    void copyLeavingParticlesToBuffer( Particles* buffer ) override;
+    
+    template<typename Predicate>
+    void copyParticlesByPredicate( Particles* buffer, Predicate pred );
+
+    //! Resize & Copy particles from particles_to_inject to end of vectors
+    int addParticles( Particles* particles_to_inject ) override;
+    
+    //! Copy particles from particles_to_inject to specific offset
+    void pasteParticles( nvidiaParticles* particles_to_inject, size_t offset_out, size_t offset_in );
     
     // -----------------------------------------------------------------------------
     //! Erase particles leaving the patch object on device and returns the number of particle removed
     // -----------------------------------------------------------------------------
     int eraseLeavingParticles() override;
     
-    // -----------------------------------------------------------------------------
-    //! Inject particles from particles_to_move into *this and return he number of particle added
-    // -----------------------------------------------------------------------------
-    int injectParticles( Particles* particles_to_inject ) override;
-
+    template<typename Predicate>
+    int eraseParticlesByPredicate( Predicate pred, size_t offset );
+    
     // ---------------------------------------------------------------------------------------------------------------------
     //! Create n_additional_particles new particles at the end of vectors
     //! Fill the new elements with 0
@@ -137,6 +130,14 @@ class nvidiaParticles : public Particles
     //! See the Particles class for documentation.
     void importAndSortParticles( Particles* particles_to_inject ) override;
 
+    //! Sort by cell_keys_
+    //! This version synchronizes for every vector, but uses less buffers
+    void sortParticleByKey();
+    //! This version is asynchronous, but requires a buffer of equal size to be provided
+    void sortParticleByKey( nvidiaParticles& buffer );
+
+    void scatterParticles( nvidiaParticles &particles_to_import, const thrust::device_vector<int> &index );
+
 protected:
     //! Redefine first_index and last_index according to the binning algorithm
     //! used on GPU.
diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp
index b8ed401d9..ca76c6ece 100755
--- a/src/Patch/Patch.cpp
+++ b/src/Patch/Patch.cpp
@@ -445,7 +445,7 @@ void Patch::setLocationAndAllocateFields( Params &params, DomainDecomposition *d
 Patch::~Patch()
 {
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
     deleteFieldsOnDevice();
 #endif
 
@@ -517,220 +517,155 @@ void Patch::updateMPIenv( SmileiMPI *smpi )
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::cleanMPIBuffers( int ispec, Params &params )
 {
-    int ndim = params.nDim_field;
+    size_t ndim = params.nDim_field;
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
 
-    for( int iDim=0 ; iDim < ndim ; iDim++ ) {
+    for( size_t iDim=0 ; iDim < ndim ; iDim++ ) {
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].clear();
-            //vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize(0);
-            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
+            buffer.partRecv[iDim][iNeighbor]->clear();
+            buffer.partSend[iDim][iNeighbor]->clear();
         }
     }
 } // cleanMPIBuffers
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// Split particles Id to send in per direction and per patch neighbor dedicated buffers
-// Apply periodicity if necessary
+// Copy particles to be exchanged to buffers
 // ---------------------------------------------------------------------------------------------------------------------
-void Patch::initExchParticles( int ispec, Params &params )
+void Patch::copyExchParticlesToBuffers( int ispec, Params &params )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-    int ndim = params.nDim_field;
-    int idim, check;
-//    double xmax[3];
-
-    for( int iDim=0 ; iDim < ndim ; iDim++ ) {
-        for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim);
-            vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 );
-            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
-        }
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    Particles &part = *vecSpecies[ispec]->particles;
+    
+    cleanMPIBuffers( ispec, params );
+    
+    // Make a list of buffers
+    vector<bool> copy( params.nDim_field*2, false );
+    vector<Particles*> sendBuffer( params.nDim_field*2, nullptr );
+    for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) {
+        copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL;
+        copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL;
+        sendBuffer[2*iDim+0] = buffer.partSend[iDim][0];
+        sendBuffer[2*iDim+1] = buffer.partSend[iDim][1];
     }
-
-    int n_part_send = cuParticles.size();
-
-    int iPart;
-
-    // Define where particles are going
-    //Put particles in the send buffer it belongs to. Priority to lower dimensions.
-    if( params.geometry != "AMcylindrical" ) {
-        for( int i=0 ; i<n_part_send ; i++ ) {
-            iPart = i;
-            check = 0;
-            idim = 0;
-            //Put indexes of particles in the first direction they will be exchanged and correct their position according to periodicity for the first exchange only.
-            while( check == 0 && idim<ndim ) {
-                if( cuParticles.position( idim, iPart ) < min_local_[idim] ) {
-                    if( neighbor_[idim][0]!=MPI_PROC_NULL ) {
-                        vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( iPart );
-                    }
-                    //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted.
-                    check = 1;
-                } else if( cuParticles.position( idim, iPart ) >= max_local_[idim] ) {
-                    if( neighbor_[idim][1]!=MPI_PROC_NULL ) {
-                        vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( iPart );
-                    }
-                    check = 1;
-                }
-                idim++;
-            }
-        }
-    } else { //if (geometry == "AMcylindrical")
-        double r_min2, r_max2;
-        r_max2 = max_local_[1] * max_local_[1] ;
-        r_min2 = min_local_[1] * min_local_[1] ;
-        for( int i=0 ; i<n_part_send ; i++ ) {
-            iPart = i;
-            //Put indexes of particles in the first direction they will be exchanged and correct their position according to periodicity for the first exchange only.
-            if( cuParticles.position( 0, iPart ) < min_local_[0] ) {
-                if( neighbor_[0][0]!=MPI_PROC_NULL ) {
-                    if ( (Pcoordinates[0]==0) && ( vecSpecies[ispec]->boundary_conditions_[0][0]!="periodic" ) ) {
-                        continue;
-                    }
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[0][0].push_back( iPart );
-                    //MESSAGE("Sending particle to the left x= " << cuParticles.position(0,iPart) <<  " xmin = " <<  min_local_[0] );
-                }
-                //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted.
-            } else if( cuParticles.position( 0, iPart ) >= max_local_[0] ) {
-                if ( (Pcoordinates[0]==params.number_of_patches[0]-1) && ( vecSpecies[ispec]->boundary_conditions_[0][1]!="periodic" ) ) {
-                    continue;
-                }
-                if( neighbor_[0][1]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[0][1].push_back( iPart );
-                    // MESSAGE("Sending particle to the right x= " << cuParticles.position(0,iPart) <<  " xmax = " <<  max_local_[0] );
-                }
-            } else if( cuParticles.distance2ToAxis( iPart ) < r_min2 ) {
-                if( neighbor_[1][0]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( iPart );
-                    //MESSAGE("Sending particle to the south r= " << cuParticles.distance2ToAxis(iPart) <<  " rmin2 = " <<  r_min2 );
-                }
-            } else if( cuParticles.distance2ToAxis( iPart ) >= r_max2 ) {
-                if( neighbor_[1][1]!=MPI_PROC_NULL ) {
-                    vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( iPart );
-                    //MESSAGE("Sending particle to the north r= " << cuParticles.distance2ToAxis(iPart) <<  " rmax2 = " <<  r_max2 << " rmin2= " << r_min2 );
-                }
-            }
-
-        }
+    if( params.geometry == "AMcylindrical" ) {
+        copy[0] = copy[0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" );
+        copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" );
     }
-
-} // initExchParticles(... iDim)
+    
+    part.copyLeavingParticlesToBuffers( copy, sendBuffer );
+    
+} // copyExchParticlesToBuffers(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, start exchange of number of particles
-//   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
-//   - smpi     : inhereted from previous SmileiMPI::exchangeParticles()
+// Exchange number of particles to exchange to establish or not a communication
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch )
 {
-    int h0 = ( *vecPatch )( 0 )->hindex;
-    /********************************************************************************/
-    // Exchange number of particles to exchange to establish or not a communication
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        
+        buffer.partSendSize[iDim][iNeighbor] = buffer.partSend[iDim][iNeighbor]->size();
+        
+        // Send number of particles from neighbor
         if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) {
-            vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-
             if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                //If neighbour is MPI ==> I send him the number of particles I'll send later.
                 int local_hindex = hindex - vecPatch->refHindex_;
                 int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Isend( &( vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] ), 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) );
+                MPI_Isend( &buffer.partSendSize[iDim][iNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &buffer.srequest[iDim][iNeighbor] );
             } else {
-                //Else, I directly set the receive size to the correct value.
-                ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] = vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor];
+                // If the destination is in the same MPI, directly set the number at destination
+                int destination_hindex = neighbor_[iDim][iNeighbor] - vecPatch->refHindex_;
+                SpeciesMPIbuffers &destination_buffer = ( *vecPatch )( destination_hindex )->vecSpecies[ispec]->MPI_buffer_;
+                destination_buffer.partRecvSize[iDim][iOppositeNeighbor] = buffer.partSendSize[iDim][iNeighbor];
             }
-        } // END of Send
-
-        if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                //If other neighbour is MPI ==> I receive the number of particles I'll receive later.
-                int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ];
+        }
+        
+        // Receive number of particles from neighbor
+        if( neighbor_[iDim][iOppositeNeighbor]!=MPI_PROC_NULL ) {
+            if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+                int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ];
                 int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Irecv( &( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] ), 1, MPI_INT, MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) );
+                MPI_Irecv( &buffer.partRecvSize[iDim][iOppositeNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] );
             }
         }
-    }//end loop on nb_neighbors.
-
+        
+    }
+    
 } // exchNbrOfParticles(... iDim)
 
 
+// ---------------------------------------------------------------------------------------------------------------------
+// Wait for end of communications over number of particles
+// ---------------------------------------------------------------------------------------------------------------------
 void Patch::endNbrOfParticles( int ispec, int iDim )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    /********************************************************************************/
-    // Wait for end of communications over number of particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-        MPI_Status sstat    [2];
-        MPI_Status rstat    [2];
-        if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
-            }
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        
+        MPI_Status sstat[2];
+        MPI_Status rstat[2];
+        if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            MPI_Wait( &( buffer.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
         }
-        if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) )  {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) );
-                if( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]!=0 ) {
-                    //If I receive particles over MPI, I initialize my receive buffer with the appropriate size.
-                    vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2].initialize( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2], cuParticles );
-                }
-            }
+        if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) )  {
+            MPI_Wait( &( buffer.rrequest[iDim][iOppositeNeighbor] ), &( rstat[iOppositeNeighbor] ) );
         }
     }
-
 } // END endNbrOfParticles(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, finalize receive of number of particles and really send particles
+// For direction iDim, prepare particles to be sent
 //   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
 //   - smpi     : used smpi->periods_
 // ---------------------------------------------------------------------------------------------------------------------
 void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch )
 {
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    int n_part_send;
-    int h0 = ( *vecPatch )( 0 )->hindex;
     double x_max = params.cell_length[iDim]*( params.global_size_[iDim] );
-
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        // n_part_send : number of particles to send to current neighbor
-        n_part_send = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            // Enabled periodicity
-            if( smpi->periods_[iDim]==1 ) {
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    if( ( iNeighbor==0 ) && ( Pcoordinates[iDim] == 0 ) &&( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) < 0. ) ) {
-                        cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] )     += x_max;
-                    } else if( ( iNeighbor==1 ) && ( Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) && ( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) >= x_max ) ) {
-                        cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] )     -= x_max;
+        
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        
+        // Enabled periodicity
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { 
+            if( partSend.size() > 0 && smpi->periods_[iDim]==1 ) {
+                if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) {
+                    for( size_t iPart=0; iPart < partSend.size(); iPart++ ) {
+                        if( partSend.position( iDim, iPart ) < 0. ) {
+                            partSend.position( iDim, iPart ) += x_max;
+                        }
+                    }
+                }
+                if( iNeighbor == 1 && Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) {
+                    for( size_t iPart=0; iPart < partSend.size(); iPart++ ) {
+                        if( partSend.position( iDim, iPart ) >= x_max ) {
+                            partSend.position( iDim, iPart ) -= x_max;
+                        }
                     }
                 }
             }
-            // Send particles
+            
+            // Initialize receive buffer with the appropriate size
             if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                // If MPI comm, first copy particles in the sendbuffer
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    cuParticles.copyParticle( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart], vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] );
+                if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) {
+                    buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles );
                 }
+            // Swap particles to other patch directly if it belongs to the same MPI
             } else {
-                //If not MPI comm, copy particles directly in the receive buffer
-                for( int iPart=0 ; iPart<n_part_send ; iPart++ ) {
-                    cuParticles.copyParticle( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart], ( ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) );
-                }
+                int iOppositeNeighbor = ( iNeighbor+1 )%2;
+                SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_;
+                swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] );
             }
-        } // END of Send
-
+        }
+    
     } // END for iNeighbor
 
 } // END prepareParticles(... iDim)
@@ -738,169 +673,133 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params &params, int iD
 
 void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch )
 {
-    int n_part_send, n_part_recv;
-
-    for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        // n_part_send : number of particles to send to current neighbor
-        n_part_send = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size();
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            // Send particles
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                // Then send particles
-                int local_hindex = hindex - vecPatch->refHindex_;
-                int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ) );
-                MPI_Isend( &( ( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) );
-            }
-        } // END of Send
-
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                // If MPI comm, receive particles in the recv buffer previously initialized.
-                vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) );
-                int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ];
-                int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
-                MPI_Irecv( &( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) );
-            }
-
-        } // END of Recv
-
-    } // END for iNeighbor
-
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
+    for( int iNeighbor=0; iNeighbor<nbNeighbors_; iNeighbor++ ) {
+        
+        // Send
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            int local_hindex = hindex - vecPatch->refHindex_;
+            int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
+            vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partSend );
+            MPI_Isend( &partSend.position( 0, 0 ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( buffer.srequest[iDim][iNeighbor] ) );
+        }
+        
+        // Receive
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor];
+        if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+            vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv );
+            int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ];
+            int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 );
+            MPI_Irecv( &partRecv.position( 0, 0 ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] );
+        }
+        
+    }
+    
 } // END exchParticles(... iDim)
 
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For direction iDim, finalize receive of particles, temporary store particles if diagonalParticles
-// And store recv particles at their definitive place.
-// Call Patch::cleanupSentParticles
-//   - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles)
-//   - smpi     : used smpi->periods_
+// For direction iDim, wait receive of particles
 // ---------------------------------------------------------------------------------------------------------------------
-void Patch::finalizeExchParticles( int ispec, int iDim )
+void Patch::waitExchParticles( int ispec, int iDim )
 {
-
-    int n_part_send, n_part_recv;
-
-    /********************************************************************************/
-    // Wait for end of communications over Particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
+        
         MPI_Status sstat    [2];
         MPI_Status rstat    [2];
-
-        n_part_send = vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].size();
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-
-        if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, iNeighbor ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) );
-                MPI_Type_free( &( vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ) );
-            }
+        
+        int iOppositeNeighbor = ( iNeighbor+1 )%2;
+        Particles &partSend = *buffer.partSend[iDim][iNeighbor];
+        Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor];
+        
+        if( partSend.size() != 0 &&  is_a_MPI_neighbor( iDim, iNeighbor ) ) {
+            MPI_Wait( &buffer.srequest[iDim][iNeighbor], &sstat[iNeighbor] );
+            MPI_Type_free( &vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] );
         }
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-            if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) {
-                MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) );
-                MPI_Type_free( &( vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ) );
-            }
+        if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) {
+            MPI_Wait( &buffer.rrequest[iDim][iOppositeNeighbor], &rstat[iOppositeNeighbor] );
+            MPI_Type_free( &vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] );
         }
     }
 }
 
 void Patch::cornersParticles( int ispec, Params &params, int iDim )
 {
-
     int ndim = params.nDim_field;
-    int idim, check;
-
-    Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move );
-
-    int n_part_recv;
-
-    /********************************************************************************/
-    // Wait for end of communications over Particles
-    /********************************************************************************/
+    SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+    
+    // No need to treat diag particles at last dimension
+    if( iDim == ndim-1 ) {
+        return;
+    }
+    
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-
-        n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2];
-
-        if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) {
-
-            // Treat diagonalParticles
-            if( iDim < ndim-1 ) { // No need to treat diag particles at last dimension.
-                if( params.geometry != "AMcylindrical" ) {
-                    for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) {
-                        check = 0;
-                        idim = iDim+1;//We check next dimension
-                        while( check == 0 && idim<ndim ) {
-                            //If particle not in the domain...
-                            if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) < min_local_[idim] ) {
-                                if( neighbor_[idim][0]!=MPI_PROC_NULL ) { //if neighbour exists
-                                    //... copy it at the back of the local particle vector ...
-                                    ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                    //...adjust particles->last_index or cell_keys ...
-                                    //vecSpecies[ispec]->addSpaceForOneParticle();
-                                    //... and add its index to the particles to be sent later...
-                                    vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( cuParticles.size()-1 );
-                                }
-                                //Remove it from receive buffer.
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                                vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--;
-                                check = 1;
-                            }
-                            //Other side of idim
-                            else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) >= max_local_[idim] ) {
-                                if( neighbor_[idim][1]!=MPI_PROC_NULL ) { //if neighbour exists
-                                    ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                    //...adjust particles->last_index or cell_keys ...
-                                    //vecSpecies[ispec]->addSpaceForOneParticle();
-                                    vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( cuParticles.size()-1 );
-                                }
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                                vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--;
-                                check = 1;
-                            }
-                            idim++;
+        
+        Particles &partRecv = *buffer.partRecv[iDim][iNeighbor];
+        
+        vector<vector<size_t>> indices_corner_min( ndim-iDim-1 );
+        vector<vector<size_t>> indices_corner_max( ndim-iDim-1 );
+        vector<size_t> indices_all_corners;
+        
+        if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partRecv.size() != 0 ) {
+            
+            // Find corner particles and store their indices
+            if( params.geometry != "AMcylindrical" ) {
+                
+                for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) {
+                    for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) {
+                        if( partRecv.position( otherDim, iPart ) < min_local_[otherDim] ) {
+                            indices_corner_min[otherDim-iDim-1].push_back( iPart );
+                            indices_all_corners.push_back( iPart );
+                            break;
+                        } else if( partRecv.position( otherDim, iPart ) >= max_local_[otherDim] ) {
+                            indices_corner_max[otherDim-iDim-1].push_back( iPart );
+                            indices_all_corners.push_back( iPart );
+                            break;
                         }
                     }
-                } else { //In AM geometry
-                    //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R.
-                    double r_min2, r_max2;
-                    r_min2 = min_local_[1]*min_local_[1];
-                    r_max2 = max_local_[1]*max_local_[1];
-                    for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) {
-                        //MESSAGE("test particle diag r2 = " << (vecSpecies[ispec]->MPI_buffer_.partRecv[0][(iNeighbor+1)%2]).distance2ToAxis(iPart) << "rmin2 = " << r_min2 << " rmax2 = " << r_max2 );
-                        if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) < r_min2 ) {
-                            if( neighbor_[1][0]!=MPI_PROC_NULL ) { //if neighbour exists
-                                //... copy it at the back of the local particle vector ...
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                //...adjust particles->last_index or cell_keys ...
-                                //vecSpecies[ispec]->addSpaceForOneParticle();
-                                //... and add its index to the particles to be sent later...
-                                vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( cuParticles.size()-1 );
-                                //..without forgeting to add it to the list of particles to clean.
-                            }
-                            //Remove it from receive buffer.
-                            ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--;
-                        }
-                        //Other side of idim
-                        else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) >= r_max2 ) {
-                            if( neighbor_[1][1]!=MPI_PROC_NULL ) { //if neighbour exists
-                                //MESSAGE("particle diag +R");
-                                ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles );
-                                //...adjust particles->last_index or cell_keys ...
-                                //vecSpecies[ispec]->addSpaceForOneParticle();
-                                vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( cuParticles.size()-1 );
-                            }
-                            ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart );
-                            vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--;
-                        }
+                }
+                
+            } else { //In AM geometry
+            
+                //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R.
+                double r_min2 = min_local_[1]*min_local_[1];
+                double r_max2 = max_local_[1]*max_local_[1];
+                
+                for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) {
+                    if( partRecv.distance2ToAxis( iPart ) < r_min2 ) {
+                        indices_corner_min[0].push_back( iPart );
+                        indices_all_corners.push_back( iPart );
+                        break;
+                    } else if( partRecv.distance2ToAxis( iPart ) >= r_max2 ) {
+                        indices_corner_max[0].push_back( iPart );
+                        indices_all_corners.push_back( iPart );
+                        break;
                     }
                 }
-            }//If not last dim for diagonal particles.
+                
+            }
+            
+            // Copy corner particles to the end of the particles to be sent for the following dimension
+            for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) {
+                if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) {
+                    partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], buffer.partSend[otherDim][0]->size() );
+                }
+                if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) {
+                    partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() );
+                }
+            }
+            
+            // Erase corner particles from the current recv array
+            if( indices_all_corners.size() > 0 ) {
+                partRecv.eraseParticles( indices_all_corners );
+            }
+            
         } //If received something
     } //loop i Neighbor
 }
@@ -925,22 +824,20 @@ void Patch::importAndSortParticles( int ispec, Params &params )
 
 void Patch::cleanParticlesOverhead( Params &params )
 {
-    int ndim = params.nDim_field;
+    
     for( unsigned int ispec=0 ; ispec<vecSpecies.size() ; ispec++ ) {
-        Particles &cuParticles = ( *vecSpecies[ispec]->particles );
-
-        for( int idim = 0; idim < ndim; idim++ ) {
+        SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_;
+        
+        for( size_t idim = 0; idim < params.nDim_field; idim++ ) {
             for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-                vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].clear();
-                vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].shrinkToFit( );
-                vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].clear();
-                vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].shrinkToFit( );
-                vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor].clear();
-                vector<int>( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ).swap( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] );
+                buffer.partRecv[idim][iNeighbor]->clear();
+                buffer.partRecv[idim][iNeighbor]->shrinkToFit( );
+                buffer.partSend[idim][iNeighbor]->clear();
+                buffer.partSend[idim][iNeighbor]->shrinkToFit( );
             }
         }
-
-        cuParticles.shrinkToFit(  );
+        
+        vecSpecies[ispec]->particles->shrinkToFit(  );
     }
 
 }
@@ -1256,7 +1153,7 @@ void Patch::computePoynting() {
     }
 }
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
 
 // ---------------------------------------------------------------------------------------------------------------------
 // Allocate data on device
@@ -1414,7 +1311,6 @@ void Patch::deleteFieldsOnDevice()
 //        for( unsigned int ispec=0 ; ispec<( *this )( ipatch )->vecSpecies.size() ; ispec++ ) {
 //            Species *spec = species( ipatch, ispec );
 //            spec->particles->initializeDataOnDevice();
-//            spec->particles_to_move->initializeDataOnDevice();
 //            //#pragma acc enter data copyin(spec->nrj_radiation)
 //        }
 
diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h
index 6fc3f7578..8d06d21c2 100755
--- a/src/Patch/Patch.h
+++ b/src/Patch/Patch.h
@@ -174,7 +174,7 @@ class Patch
     //! Clean the MPI buffers for communications
     void cleanMPIBuffers( int ispec, Params &params );
     //! manage Idx of particles per direction,
-    void initExchParticles( int ispec, Params &params );
+    void copyExchParticlesToBuffers( int ispec, Params &params );
     //! init comm  nbr of particles
     void exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch );
     //! finalize comm / nbr of particles, init exch / particles
@@ -184,7 +184,7 @@ class Patch
     //! effective exchange of particles
     void exchParticles( SmileiMPI *smpi, int ispec, Params &params, int iDim, VectorPatch *vecPatch );
     //! finalize exch / particles
-    void finalizeExchParticles( int ispec, int iDim );
+    void waitExchParticles( int ispec, int iDim );
     //! Treat diagonalParticles
     void cornersParticles( int ispec, Params &params, int iDim );
     //! inject particles received in main data structure and particles sorting
@@ -194,7 +194,7 @@ class Patch
     //! delete Particles included in the index of particles to exchange. Assumes indexes are sorted.
     void cleanupSentParticles( int ispec, std::vector<int> *indexes_of_particles_to_exchange );
 
-#ifdef SMILEI_ACCELERATOR_MODE
+#ifdef SMILEI_ACCELERATOR_GPU
     //! Allocate and copy all the field grids on device
     void allocateAndCopyFieldsOnDevice();
 
diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp
index 09817b201..5e1c39694 100755
--- a/src/Patch/SyncVectorPatch.cpp
+++ b/src/Patch/SyncVectorPatch.cpp
@@ -2,7 +2,7 @@
 #include "SyncVectorPatch.h"
 
 #include <vector>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <openacc.h>
 #endif
 #include "Params.h"
@@ -24,52 +24,34 @@ template void SyncVectorPatch::exchangeAlongAllDirections<complex<double>,cField
 template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP<double,Field>( std::vector<Field *> fields, VectorPatch &vecPatches, SmileiMPI *smpi );
 template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP<complex<double>,cField>( std::vector<Field *> fields, VectorPatch &vecPatches, SmileiMPI *smpi );
 
-void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::initExchParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
 {
     #pragma omp for schedule(runtime)
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        Species *spec = vecPatches.species( ipatch, ispec );
-        // Leaving particles are put in a buffer called particle_to_move
-        // On GPU, particle_to_move is built and bring back to the CPU here
-        spec->extractParticles();
-        vecPatches( ipatch )->initExchParticles( ispec, params );
-    }
-
-    // Init comm in direction 0
-#ifndef _NO_MPI_TM
-    #pragma omp for schedule(runtime)
-#else
-    #pragma omp single
-#endif
-    for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, 0, &vecPatches );
+        vecPatches( ipatch )->copyExchParticlesToBuffers( ispec, params );
     }
+    
+    // Start exchange along dimension 0 only
+    SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi );
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
 //! This function performs:
-//! - the exhcange of particles for each direction using the diagonal trick.
+//! - the exchange of particles for each direction using the diagonal trick.
 //! - the importation of the new particles in the particle property arrays
 //! - the sorting of particles
 // ---------------------------------------------------------------------------------------------------------------------
-void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi )
 {
-    SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, 0, params, smpi );
-
-    // Per direction
+    // finish exchange along dimension 0 only
+    SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi );
+    
+    // Other directions
     for( unsigned int iDim=1 ; iDim<params.nDim_field ; iDim++ ) {
-#ifndef _NO_MPI_TM
-        #pragma omp for schedule(runtime)
-#else
-        #pragma omp single
-#endif
-        for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-            vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches );
-        }
-
-        SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, iDim, params, smpi );
+        SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi );
+        SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi );
     }
-
+    
     #pragma omp for schedule(runtime)
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
         vecPatches( ipatch )->importAndSortParticles( ispec, params );
@@ -108,8 +90,20 @@ void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int isp
 
 }
 
+void SyncVectorPatch::initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
+{
+    // Exchange numbers of particles in direction 0 only
+#ifndef _NO_MPI_TM
+    #pragma omp for schedule(runtime)
+#else
+    #pragma omp single
+#endif
+    for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
+        vecPatches( ipatch )->exchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches );
+    }
+}
 
-void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
+void SyncVectorPatch::finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi )
 {
 #ifndef _NO_MPI_TM
     #pragma omp for schedule(runtime)
@@ -140,7 +134,7 @@ void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int is
     #pragma omp single
 #endif
     for( unsigned int ipatch=0 ; ipatch<vecPatches.size() ; ipatch++ ) {
-        vecPatches( ipatch )->finalizeExchParticles( ispec, iDim );
+        vecPatches( ipatch )->waitExchParticles( ispec, iDim );
     }
 
     #pragma omp for schedule(runtime)
@@ -275,7 +269,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 vecPatches.densitiesMPIx[ifield             ]->extract_fields_sum( 0, iNeighbor, oversize[0] );
                 vecPatches.densitiesMPIx[ifield+nPatchMPIx  ]->extract_fields_sum( 0, iNeighbor, oversize[0] );
                 vecPatches.densitiesMPIx[ifield+2*nPatchMPIx]->extract_fields_sum( 0, iNeighbor, oversize[0] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 Field* field = vecPatches.densitiesMPIx[ifield      ];
 //                 double* Jx   = field->sendFields_[iNeighbor]->data_;
 //                 int sizeofJx = field->sendFields_[iNeighbor]->size();
@@ -297,7 +291,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
     // iDim = 0, local
     const int nFieldLocalx = vecPatches.densitiesLocalx.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     // At initialization, we may get a CPU buffer than needs to be handled on the host.
     const bool is_memory_on_device = vecPatches.densitiesLocalx.size() > 0 &&
                                      smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalx[0]->data() );
@@ -330,9 +324,9 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 pt2 = &( vecPatches.densitiesLocalx[ifield]->data_[0] );
                 //Sum 2 ==> 1
 
-                const int last = gsp[0] * ny_ * nz_;
+                const unsigned int last = gsp[0] * ny_ * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                 int ptsize = vecPatches.densitiesLocalx[ifield]->size();
                 int nspace0 = size[0];
                 #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize])
@@ -364,7 +358,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
         vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIx[ifield+2*nPatchMPIx], 0 ); // Jz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 Field* field = vecPatches.densitiesMPIx[ifield      ];
 //                 double* Jx   = field->recvFields_[(iNeighbor+1)%2]->data_;
 //                 int sizeofJx = field->recvFields_[(iNeighbor+1)%2]->size();
@@ -408,7 +402,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                     vecPatches.densitiesMPIy[ifield             ]->extract_fields_sum( 1, iNeighbor, oversize[1] );
                     vecPatches.densitiesMPIy[ifield+nPatchMPIy  ]->extract_fields_sum( 1, iNeighbor, oversize[1] );
                     vecPatches.densitiesMPIy[ifield+2*nPatchMPIy]->extract_fields_sum( 1, iNeighbor, oversize[1] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     Field* field = vecPatches.densitiesMPIy[ifield      ];
 //                     double* Jx   = field->sendFields_[iNeighbor+2]->data_;
 //                     int sizeofJx = field->sendFields_[iNeighbor+2]->size();
@@ -430,7 +424,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
         // iDim = 1,
         const int nFieldLocaly = vecPatches.densitiesLocaly.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         const bool is_memory_on_device = vecPatches.densitiesLocaly.size() > 0 &&
                                          smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocaly[0]->data() );
 #endif
@@ -463,11 +457,11 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                     pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size[1]*nz_] );
                     pt2 = &( vecPatches.densitiesLocaly[ifield]->data_[0] );
 
-                    const int outer_last   = nx_ * ny_ * nz_;
-                    const int outer_stride = ny_ * nz_;
-                    const int inner_last   = gsp[1] * nz_;
+                    const unsigned int outer_last   = nx_ * ny_ * nz_;
+                    const unsigned int outer_stride = ny_ * nz_;
+                    const unsigned int inner_last   = gsp[1] * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     int ptsize = vecPatches.densitiesLocaly[ifield]->size();
                     int blabla = size[1];
                     #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize])
@@ -502,7 +496,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
             vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIy[ifield+2*nPatchMPIy], 1 ); // Jz
             for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
                 if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     Field* field = vecPatches.densitiesMPIy[ifield      ];
 //                     double* Jx   = field->recvFields_[(iNeighbor+1)%2+2]->data_;
 //                     int sizeofJx = field->recvFields_[(iNeighbor+1)%2+2]->size();
@@ -544,7 +538,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                         vecPatches.densitiesMPIz[ifield             ]->extract_fields_sum( 2, iNeighbor, oversize[2] );
                         vecPatches.densitiesMPIz[ifield+nPatchMPIz  ]->extract_fields_sum( 2, iNeighbor, oversize[2] );
                         vecPatches.densitiesMPIz[ifield+2*nPatchMPIz]->extract_fields_sum( 2, iNeighbor, oversize[2] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                         Field* field = vecPatches.densitiesMPIz[ifield      ];
 //                         double* Jx   = field->sendFields_[iNeighbor+4]->data_;
 //                         int sizeofJx = field->sendFields_[iNeighbor+4]->size();
@@ -566,7 +560,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
             // iDim = 2 local
             const int nFieldLocalz = vecPatches.densitiesLocalz.size() / 3;
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             const bool is_memory_on_device = vecPatches.densitiesLocalz.size() > 0 &&
                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalz[0]->data() );
 #endif
@@ -600,11 +594,11 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                         pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size[2]] );
                         pt2 = &( vecPatches.densitiesLocalz[ifield]->data_[0] );
 
-                        const int outer_last   = nx_ * ny_ * nz_;
-                        const int outer_stride = nz_;
-                        const int inner_last   = gsp[2];
+                        const unsigned int outer_last   = nx_ * ny_ * nz_;
+                        const unsigned int outer_stride = nz_;
+                        const unsigned int inner_last   = gsp[2];
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                         int ptsize = vecPatches.densitiesLocalz[ifield]->size();
                         int blabla = size[2];
                         #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize])
@@ -636,7 +630,7 @@ void SyncVectorPatch::sumAllComponents( std::vector<Field *> &fields, VectorPatc
                 vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIz[ifield+2*nPatchMPIz], 2 ); // Jz
                 for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
                     if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) {
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                         Field* field = vecPatches.densitiesMPIz[ifield      ];
 //                         double* Jx   = field->recvFields_[(iNeighbor+1)%2+4]->data_;
 //                         int sizeofJx = field->recvFields_[(iNeighbor+1)%2+4]->size();
@@ -803,7 +797,7 @@ void SyncVectorPatch::exchangeE( Params &, VectorPatch &vecPatches, int imode, S
     SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listEt_[imode], vecPatches );
 }
 
-void SyncVectorPatch::exchangeBmBTIS3( Params &params, VectorPatch &vecPatches, int imode, SmileiMPI *smpi )
+void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, int imode, SmileiMPI *smpi )
 {
     SyncVectorPatch::exchangeAlongAllDirections<complex<double>,cField>( vecPatches.listBr_mBTIS3[imode], vecPatches, smpi );
     SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listBr_mBTIS3[imode], vecPatches );
@@ -887,7 +881,7 @@ void SyncVectorPatch::exchangeEnvEx( Params &params, VectorPatch &vecPatches, Sm
     }
 }
 
-void SyncVectorPatch::exchangeBmBTIS3( Params &params, VectorPatch &vecPatches, SmileiMPI *smpi )
+void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, SmileiMPI *smpi )
 {   // exchange BmBTIS3 in Cartesian geometries
 
     // exchange ByBTIS3 
@@ -1493,7 +1487,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongX( std::vector<Field *> &fields,
                 vecPatches.B_MPIx[ifield      ]->extract_fields_exch( 0, iNeighbor, oversize );
                 vecPatches.B_MPIx[ifield+nMPIx]->create_sub_fields  ( 0, iNeighbor, oversize );
                 vecPatches.B_MPIx[ifield+nMPIx]->extract_fields_exch( 0, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B_MPIx[ifield      ];
                 double* By   = field->sendFields_[iNeighbor]->data_;
                 int sizeofBy = field->sendFields_[iNeighbor]->size();
@@ -1586,7 +1580,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongX( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B_MPIx[ifield+nMPIx], 0 ); // Bz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B_MPIx[ifield      ];
                 double* By   = field->recvFields_[(iNeighbor+1)%2]->data_;
                 int sizeofBy = field->recvFields_[(iNeighbor+1)%2]->size();
@@ -1629,7 +1623,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector<Field *> &fields,
                 vecPatches.B1_MPIy[ifield      ]->extract_fields_exch( 1, iNeighbor, oversize );
                 vecPatches.B1_MPIy[ifield+nMPIy]->create_sub_fields  ( 1, iNeighbor, oversize );
                 vecPatches.B1_MPIy[ifield+nMPIy]->extract_fields_exch( 1, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B1_MPIy[ifield      ];
                 double* Bx   = field->sendFields_[iNeighbor+2]->data_;
                 int sizeofBx = field->sendFields_[iNeighbor+2]->size();
@@ -1677,7 +1671,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector<Field *> &fields,
             if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[1][0] ) {
                 pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size*nz_] );
                 pt2 = &( vecPatches.B1_localy[ifield]->data_[0] );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 int ptsize = vecPatches.B1_localy[ifield]->size();
                 #pragma acc parallel present(pt1[0-size*nz_:ptsize],pt2[0:ptsize])
                 #pragma acc loop gang worker vector
@@ -1717,7 +1711,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongY( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B1_MPIy[ifield+nMPIy], 1 ); // Bz
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B1_MPIy[ifield      ];
                 double* Bx   = field->recvFields_[(iNeighbor+1)%2+2]->data_;
                 int sizeofBx = field->recvFields_[(iNeighbor+1)%2+2]->size();
@@ -1760,7 +1754,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector<Field *> fields,
                 vecPatches.B2_MPIz[ifield      ]->extract_fields_exch( 2, iNeighbor, oversize );
                 vecPatches.B2_MPIz[ifield+nMPIz]->create_sub_fields  ( 2, iNeighbor, oversize );
                 vecPatches.B2_MPIz[ifield+nMPIz]->extract_fields_exch( 2, iNeighbor, oversize );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B2_MPIz[ifield      ];
                 double* Bx   = field->sendFields_[iNeighbor+4]->data_;
                 int sizeofBx = field->sendFields_[iNeighbor+4]->size();
@@ -1805,7 +1799,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector<Field *> fields,
             if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[2][0] ) {
                 pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size] );
                 pt2 = &( vecPatches.B2_localz[ifield]->data_[0] );
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 int ptsize = vecPatches.B2_localz[ifield]->size();
                 #pragma acc parallel present(pt1[0-size:ptsize],pt2[0:ptsize])
                 #pragma acc loop gang worker vector
@@ -1845,7 +1839,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongZ( VectorPatch &vecPatch
         vecPatches( ipatch )->finalizeExchange( vecPatches.B2_MPIz[ifield+nMPIz], 2 ); // By
         for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) {
             if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 Field* field = vecPatches.B2_MPIz[ifield      ];
                 double* Bx   = field->recvFields_[(iNeighbor+1)%2+4]->data_;
                 int sizeofBx = field->recvFields_[(iNeighbor+1)%2+4]->size();
diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h
index 0ce868cae..07435cd49 100755
--- a/src/Patch/SyncVectorPatch.h
+++ b/src/Patch/SyncVectorPatch.h
@@ -17,9 +17,10 @@ class SyncVectorPatch
 public :
 
     //! Particles synchronization
-    static void exchangeParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
-    static void finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
-    static void finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
+    static void initExchParticles( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
+    static void finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params &params, SmileiMPI *smpi );
+    static void initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
+    static void finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params &params, SmileiMPI *smpi );
 
     //! Densities synchronization
     static void sumRhoJ( Params &params, VectorPatch &vecPatches, SmileiMPI *smpi );
@@ -72,7 +73,7 @@ public :
                 if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, iNeighbor ) ) {
                     fields[ifield]->create_sub_fields ( 0, iNeighbor, 2*oversize[0]+1+fields[ifield]->isDual_[0] );
                     fields[ifield]->extract_fields_sum( 0, iNeighbor, oversize[0] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                     double * pointer = fields[ifield]->sendFields_[iNeighbor]->data_;
 //                     int size = fields[ifield]->size();
 // #endif
@@ -86,7 +87,7 @@ public :
 
         // iDim = 0, local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     // At initialization, we may get a CPU buffer than needs to be handled on the host.
         const bool is_memory_on_device = fields.size() > 0 &&
                                      smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
@@ -122,7 +123,7 @@ public :
 
                     const unsigned int last = gsp[0] * ny_ * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     int ptsize = fields[ifield]->size();
                     int nspace0 = size[0];
                     #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize])
@@ -176,7 +177,7 @@ public :
                     if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, iNeighbor ) ) {
                         fields[ifield]->create_sub_fields ( 1, iNeighbor, 2*oversize[1]+1+fields[ifield]->isDual_[1] );
                         fields[ifield]->extract_fields_sum( 1, iNeighbor, oversize[1] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                 double* pointer   = fields[ifield]->recvFields_[(iNeighbor+1)%2]->data_;
 //                 int size = fields[ifield]->recvFields_[(iNeighbor+1)%2]->size();
 //                 //#pragma acc update device( Jx[0:sizeofJx], Jy[0:sizeofJy], Jz[0:sizeofJz] )
@@ -191,7 +192,7 @@ public :
 
             // iDim = 1, local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             const bool is_memory_on_device = fields.size() > 0 &&
                 smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
 #endif
@@ -219,11 +220,11 @@ public :
                         pt1 = &( *field1 )( size[1]*nz_ );
                         pt2 = &( *field2 )( 0 );
 
-                        const int outer_last   = nx_ * ny_ * nz_;
-                        const int outer_stride = ny_ * nz_;
-                        const int inner_last   = gsp[1] * nz_;
+                        const unsigned int outer_last   = nx_ * ny_ * nz_;
+                        const unsigned int outer_stride = ny_ * nz_;
+                        const unsigned int inner_last   = gsp[1] * nz_;
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                         int ptsize = fields[ifield]->size();
                         int blabla = size[1];
                         #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize])
@@ -281,7 +282,7 @@ public :
                         if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, iNeighbor ) ) {
                             fields[ifield]->create_sub_fields ( 2, iNeighbor, 2*oversize[2]+1+fields[ifield]->isDual_[2] );
                             fields[ifield]->extract_fields_sum( 2, iNeighbor, oversize[2] );
-// #ifdef SMILEI_OPENACC_MODE
+// #ifdef SMILEI_ACCELERATOR_GPU_OACC
 //                             double* pointer   = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->data_;
 //                             int size = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->size();
 // #endif                       
@@ -292,7 +293,7 @@ public :
 
                 // iDim = 2 local
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                 const bool is_memory_on_device = fields.size() > 0 &&
                                              smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() );
 #endif
@@ -320,11 +321,11 @@ public :
                             pt1 = &( *field1 )( size[2] );
                             pt2 = &( *field2 )( 0 );
 
-                            const int outer_last   = nx_ * ny_ * nz_;
-                            const int outer_stride = nz_;
-                            const int inner_last = gsp[2];
+                            const unsigned int outer_last   = nx_ * ny_ * nz_;
+                            const unsigned int outer_stride = nz_;
+                            const unsigned int inner_last = gsp[2];
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                             int ptsize = fields[ifield]->size();
                             int blabla = size[2];
                             #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize])
diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp
index 8b239b905..42f4dd3d8 100755
--- a/src/Patch/VectorPatch.cpp
+++ b/src/Patch/VectorPatch.cpp
@@ -301,7 +301,7 @@ void VectorPatch::reconfiguration( Params &params, Timers &timers, int itime )
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::initialParticleSorting( Params &params )
 {
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC)
     // Initially I wanted to control the GPU particle sorting/bin initialization
     // here. In the end it was put in initializeDataOnDevice which is more
     // meaningful.
@@ -322,7 +322,7 @@ void VectorPatch::initialParticleSorting( Params &params )
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// For all patches, move particles (restartRhoJ(s), dynamics and exchangeParticles)
+// For all patches, move particles (restartRhoJ(s), dynamics and initExchParticles)
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::dynamics( Params &params,
                             SmileiMPI *smpi,
@@ -402,7 +402,7 @@ void VectorPatch::dynamics( Params &params,
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         Species *spec = species( 0, ispec );
         if ( (!params.Laser_Envelope_model) && (spec->isProj( time_dual, simWindow )) ){
-            SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
         } // end condition on Species and on envelope model
     } // end loop on species
     //MESSAGE("exchange particles");
@@ -460,7 +460,7 @@ void VectorPatch::projectionForDiags( Params &params,
 // ---------------------------------------------------------------------------------------------------------------------
 //! For all patches, exchange particles and sort them.
 // ---------------------------------------------------------------------------------------------------------------------
-void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
+void VectorPatch::finalizeExchParticlesAndSort( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
         double time_dual, Timers &timers, int itime )
 {
     timers.syncPart.restart();
@@ -471,7 +471,7 @@ void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, Sim
 
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) {
-            SyncVectorPatch::finalizeAndSortParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::finalizeExchParticlesAndSort( ( *this ), ispec, params, smpi ); // Included sortParticles
         }
 
     }
@@ -491,7 +491,7 @@ void VectorPatch::finalizeAndSortParticles( Params &params, SmileiMPI *smpi, Sim
 
     timers.syncPart.update( params.printNow( itime ) );
 
-} // END finalizeAndSortParticles
+} // END finalizeExchParticlesAndSort
 
 
 //! Perform the particles merging on all patches
@@ -853,7 +853,7 @@ void VectorPatch::sumDensities( Params &params, double time_dual, Timers &timers
         #pragma omp for schedule(static)
         for( unsigned int ipatch=0 ; ipatch<this->size() ; ipatch++ ) {
             // Per species in global, Attention if output -> Sync / per species fields
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
             // At itime == 0, data is still located on the Host
             if (itime == 0) {
                 ( *this )( ipatch )->EMfields->computeTotalRhoJ();
@@ -1269,7 +1269,7 @@ void VectorPatch::closeAllDiags( SmileiMPI *smpi )
 // ---------------------------------------------------------------------------------------------------------------------
 void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int itime, Timers &timers, SimWindow *simWindow )
 {
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     bool data_on_cpu_updated = false;
 #endif
 
@@ -1277,7 +1277,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     timers.diags.restart();
 
     // Determine which data is required from the device
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     bool need_particles = false;
     bool need_fields    = false;
 
@@ -1346,7 +1346,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     for( unsigned int idiag = 0 ; idiag < globalDiags.size() ; idiag++ ) {
         diag_timers_[idiag]->restart();
 
-// #if defined( SMILEI_ACCELERATOR_MODE)
+// #if defined( SMILEI_ACCELERATOR_GPU)
 //         if( globalDiags[idiag]->timeSelection->theTimeIsNow( itime ) &&
 //             !data_on_cpu_updated &&
 //             ( itime > 0 ) ) {
@@ -1462,7 +1462,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
     for( unsigned int idiag = 0 ; idiag < localDiags.size() ; idiag++ ) {
         diag_timers_[globalDiags.size()+idiag]->restart();
 
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //         if( localDiags[idiag]->timeSelection->theTimeIsNow( itime ) &&
 //             !data_on_cpu_updated &&
 //             ( itime > 0 ) ) {
@@ -1496,7 +1496,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int
         for( unsigned int ipatch=0 ; ipatch<size() ; ipatch++ ) {
             ( *this )( ipatch )->EMfields->restartRhoJs();
 
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
             // Delete species current and rho grids from device 
             for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
                 ( *this )( ipatch )->vecSpecies[ispec]->Species::deleteSpeciesCurrentAndChargeOnDevice(ispec, ( *this )( ipatch )->EMfields);
@@ -2973,7 +2973,7 @@ void VectorPatch::createPatches( Params &params, SmileiMPI *smpi, SimWindow *sim
 
     // Set Index of the 1st patch of the vector yet on current MPI rank
     // Is this really necessary ? It should be done already ...
-    refHindex_ = ( *this )( 0 )->Hindex();
+    setRefHindex();
 
     // Current number of patch
     int nPatches_now = this->size() ;
@@ -4402,7 +4402,7 @@ void VectorPatch::moveWindow(
     // Bring all particles and field grids to the Host (except species grids)
     // This part can be optimized by copying only the patch to be destructed
 
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
     if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) {
         copyParticlesFromDeviceToHost();
         copyFieldsFromDeviceToHost();
@@ -4412,10 +4412,11 @@ void VectorPatch::moveWindow(
 
     simWindow->shift( (*this), smpi, params, itime, time_dual, region );
 
-    if (itime == simWindow->getAdditionalShiftsIteration() ) {
+    if( itime == (int) simWindow->getAdditionalShiftsIteration() ) {
         int adjust = simWindow->isMoving(time_dual)?0:1;
-        for (unsigned int n=0;n < simWindow->getNumberOfAdditionalShifts()-adjust; n++)
+        for( unsigned int n=0; n < simWindow->getNumberOfAdditionalShifts()-adjust; n++ ) {
             simWindow->shift( (*this), smpi, params, itime, time_dual, region );
+        }
     }
 
     // Copy all Fields and Particles to the device
@@ -4423,7 +4424,7 @@ void VectorPatch::moveWindow(
 
 
 // let's try initialising like we do at the start:
-/*#if defined( SMILEI_ACCELERATOR_MODE )
+/*#if defined( SMILEI_ACCELERATOR_GPU )
     // Allocate particle and field arrays
     // Also copy particle array content on device
     vecPatches.allocateDataOnDevice( params, &smpi,
@@ -4434,7 +4435,7 @@ void VectorPatch::moveWindow(
 #endif*/
 
 // does not do anything?
- /*#if defined( SMILEI_ACCELERATOR_MODE)
+ /*#if defined( SMILEI_ACCELERATOR_GPU)
      if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) {
         copyFieldsFromHostToDevice();
         copyParticlesFromHostToDevice();
@@ -4588,7 +4589,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrents( Params &params,
     timers.syncPart.restart();
     for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) {
         if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) {
-            SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
+            SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles
         } // end condition on species
     } // end loop on species
     timers.syncPart.update( params.printNow( itime ) );
@@ -4609,91 +4610,26 @@ void VectorPatch::initNewEnvelope( Params & )
 } // END initNewEnvelope
 
 
+#if defined( SMILEI_ACCELERATOR_GPU )
 void VectorPatch::allocateDataOnDevice(Params &params,
                                        SmileiMPI *smpi,
                                        RadiationTables *radiation_tables,
                                        MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables)
 {
-                                         
-#if defined( SMILEI_ACCELERATOR_MODE )
     // TODO(Etienne M): FREE. If we have load balancing or other patch
     // creation/destruction available (which is not the case on GPU ATM),
     // we should be taking care of freeing this GPU memory.
 
-    const int npatches = this->size();
-
-    // const int sizeofJx  = patches_[0]->EMfields->Jx_->size();
-    // const int sizeofJy  = patches_[0]->EMfields->Jy_->size();
-    // const int sizeofJz  = patches_[0]->EMfields->Jz_->size();
-    // const int sizeofRho = patches_[0]->EMfields->rho_->size();
-
-    // const int sizeofEx = patches_[0]->EMfields->Ex_->size();
-    // const int sizeofEy = patches_[0]->EMfields->Ey_->size();
-    // const int sizeofEz = patches_[0]->EMfields->Ez_->size();
-
-    // const int sizeofBx = patches_[0]->EMfields->Bx_->size();
-    // const int sizeofBy = patches_[0]->EMfields->By_->size();
-    // const int sizeofBz = patches_[0]->EMfields->Bz_->size();
-
-    for( int ipatch=0 ; ipatch<npatches ; ipatch++ ) {
+    for( auto patch: patches_ ) {
 
         // Initialize particles data structures on GPU, and synchronize it
-        for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
-            Species *spec = species( ipatch, ispec );
-            spec->particles->initializeDataOnDevice();
-            spec->particles_to_move->initializeDataOnDevice();
-
-            // Create photon species on the device
-            if ( spec->radiation_model_ == "mc" && spec->photon_species_) {
-                spec->radiated_photons_->initializeDataOnDevice();
-            }
-
-            // Create pair species on the device
-            if ( spec->mBW_pair_species_[0] && spec->mBW_pair_species_[1]) {
-                 spec->mBW_pair_particles_[0]->initializeDataOnDevice();
-                 spec->mBW_pair_particles_[1]->initializeDataOnDevice();
-            }
-
-            //#pragma acc enter data copyin(spec->nrj_radiation)
+        for( auto spec: patch->vecSpecies ) {
+            spec->allocateParticlesOnDevice();
         }
 
         // Allocate field data structures on GPU
-        patches_[ipatch]->allocateFieldsOnDevice();
-
-        // const double *const Jx  = patches_[ipatch]->EMfields->Jx_->data();
-        // const double *const Jy  = patches_[ipatch]->EMfields->Jy_->data();
-        // const double *const Jz  = patches_[ipatch]->EMfields->Jz_->data();
-        // const double *const Rho = patches_[ipatch]->EMfields->rho_->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jx, sizeofJx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jy, sizeofJy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jz, sizeofJz );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Rho, sizeofRho );
-
-        // const double *const Ex = patches_[ipatch]->EMfields->Ex_->data();
-        // const double *const Ey = patches_[ipatch]->EMfields->Ey_->data();
-        // const double *const Ez = patches_[ipatch]->EMfields->Ez_->data();
+        patch->allocateFieldsOnDevice();
 
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ex, sizeofEx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ey, sizeofEy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ez, sizeofEz );
-
-        // const double *const Bmx = patches_[ipatch]->EMfields->Bx_m->data();
-        // const double *const Bmy = patches_[ipatch]->EMfields->By_m->data();
-        // const double *const Bmz = patches_[ipatch]->EMfields->Bz_m->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmx, sizeofBx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmy, sizeofBy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmz, sizeofBz );
-
-        // const double *const Bx = patches_[ipatch]->EMfields->Bx_->data();
-        // const double *const By = patches_[ipatch]->EMfields->By_->data();
-        // const double *const Bz = patches_[ipatch]->EMfields->Bz_->data();
-
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bx, sizeofBx );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( By, sizeofBy );
-        // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bz, sizeofBz );
-        
     } // end patch loop
 
     // TODO(Etienne M): We should create a function that does the copy of the radiation table.
@@ -4745,17 +4681,24 @@ void VectorPatch::allocateDataOnDevice(Params &params,
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( min_particle_chi_table, min_particle_chi_size );
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( xi_table, xi_table_size );
     }
+}
 #else
+void VectorPatch::allocateDataOnDevice(Params &,
+                                       SmileiMPI *,
+                                       RadiationTables *,
+                                       MultiphotonBreitWheelerTables *)
+{
     ERROR( "GPU related code should not be reached in CPU mode!" );
-#endif
 }
+#endif
+
 
 //! Clean data allocated on device
+#if defined( SMILEI_ACCELERATOR_GPU )
 void VectorPatch::cleanDataOnDevice( Params &params, SmileiMPI *smpi,
                                     RadiationTables *radiation_tables,
                                     MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables)
 {
-#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP )
 
     const int npatches = this->size();
 
@@ -4865,12 +4808,17 @@ void VectorPatch::cleanDataOnDevice( Params &params, SmileiMPI *smpi,
         smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( xi_table, xi_table_size );
 
     }
+}
 #else
+void VectorPatch::cleanDataOnDevice( Params &, SmileiMPI *,
+                                     RadiationTables *,
+                                     MultiphotonBreitWheelerTables *)
+{
     ERROR( "GPU related code should not be reached in CPU mode!" );
-#endif
 }
+#endif
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 //! Field Synchronization from the GPU (Device) to the CPU
 //! This function updates the data on the host from the data located on the device
@@ -4910,9 +4858,7 @@ void VectorPatch::copyFieldsFromHostToDevice()
 
     }
 }
-#endif
 
-#if defined( SMILEI_ACCELERATOR_MODE)
 //! Sync all fields from device to host
 void
 VectorPatch::copyFieldsFromDeviceToHost()
@@ -4925,10 +4871,6 @@ VectorPatch::copyFieldsFromDeviceToHost()
 
     }
 }
-#endif
-
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 
 //! Copy all species particles from  Host to devices
 void VectorPatch::copyParticlesFromHostToDevice()
@@ -4940,9 +4882,6 @@ void VectorPatch::copyParticlesFromHostToDevice()
         }
     }
 }
-#endif
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 
 //! copy all patch Particles from device to Host
 void
@@ -4955,9 +4894,7 @@ VectorPatch::copyParticlesFromDeviceToHost()
     for( int ipatch = 0; ipatch < npatches; ipatch++ ) {
         for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) {
                 species( ipatch, ispec )->particles->copyFromDeviceToHost();
-#if defined ( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_MODE )
                 species( ipatch, ispec )->particles->setHostBinIndex();
-#endif
                 // std::cerr 
                 // << "ipatch: " << ipatch
                 // << " ispec: "  << ispec
@@ -4970,9 +4907,6 @@ VectorPatch::copyParticlesFromDeviceToHost()
     }
 }
 
-#endif
-
-#if defined( SMILEI_ACCELERATOR_MODE)
 //! Sync all fields from device to host
 void
 VectorPatch::copySpeciesFieldsFromDeviceToHost()
@@ -5052,7 +4986,7 @@ void VectorPatch::dynamicsWithoutTasks( Params &params,
 
                 if( spec->isProj( time_dual, simWindow ) || diag_flag ) {
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
                     if (diag_flag) {
                         spec->Species::prepareSpeciesCurrentAndChargeOnDevice(
                             ispec,
@@ -5364,7 +5298,7 @@ void VectorPatch::dynamicsWithTasks( Params &params,
             Species *spec_task = species( ipatch, ispec );
             for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                 for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                    if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                    if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                     }
@@ -5380,7 +5314,7 @@ void VectorPatch::dynamicsWithTasks( Params &params,
             Species *spec_task = species( ipatch, ispec );
             for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                 for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                    if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                    if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                     }
@@ -5600,7 +5534,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params &param
                 Species *spec_task = species( ipatch, ispec );
                 for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                     for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                        if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                        if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                             //First reduction of the count sort algorithm. Lost particles are not included.
                             spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                         }
@@ -5618,7 +5552,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params &param
                     Species *spec_task = species( ipatch, ispec );
                     for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) {
                         for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPart<spec_task->particles->last_index[scell]; iPart++ ) {
-                            if ( spec_task->particles->cell_keys[iPart] != -1 ) {
+                            if ( spec_task->particles->cell_keys[iPart] >= 0 ) {
                                 //First reduction of the count sort algorithm. Lost particles are not included.
                                 spec_task->count[spec_task->particles->cell_keys[iPart]] ++;
                             }
diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h
index 01ec195c2..051d78276 100755
--- a/src/Patch/VectorPatch.h
+++ b/src/Patch/VectorPatch.h
@@ -138,7 +138,7 @@ public :
     //! Particle sorting for all patches. This is done at initialization time.
     void initialParticleSorting( Params &params );
     
-    //! For all patch, move particles (restartRhoJ(s), dynamics and exchangeParticles)
+    //! For all patch, move particles (restartRhoJ(s), dynamics and initExchParticles)
     void dynamics( Params &params,
                    SmileiMPI *smpi,
                    SimWindow *simWindow,
@@ -157,7 +157,7 @@ public :
                    Timers &timers, int itime );
     
     //! For all patches, exchange particles and sort them.
-    void finalizeAndSortParticles( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
+    void finalizeExchParticlesAndSort( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
                                   double time_dual,
                                   Timers &timers, int itime );
     void finalizeSyncAndBCFields( Params &params, SmileiMPI *smpi, SimWindow *simWindow,
@@ -510,7 +510,7 @@ public :
                             RadiationTables * radiation_tables,
                             MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables );
     
-#if defined( SMILEI_ACCELERATOR_MODE)
+#if defined( SMILEI_ACCELERATOR_GPU)
 
     //! Field Synchronization from the GPU (Device) to the host (CPU)
 
diff --git a/src/Projector/Projector1D.h b/src/Projector/Projector1D.h
index d51327bb7..c08c0e9a8 100755
--- a/src/Projector/Projector1D.h
+++ b/src/Projector/Projector1D.h
@@ -18,21 +18,19 @@ class Projector1D : public Projector
     virtual ~Projector1D() {};
     virtual void mv_win( unsigned int shift )
     {
-        index_domain_begin+=shift;
+        i_domain_begin_ += shift;
     }
     virtual void setMvWinLimits( unsigned int shift )
     {
-        index_domain_begin = shift;
+        i_domain_begin_ = shift;
     }
     
 protected:
     //! Inverse of the spatial step 1/dx
     double dx_inv_;
-    int index_domain_begin;
+    double dx_ov_dt_;
+    int i_domain_begin_;
     double *Jx_, *Jy_, *Jz_, *rho_;
-    
-private:
-
 };
 
 #endif
diff --git a/src/Projector/Projector1D2Order.cpp b/src/Projector/Projector1D2Order.cpp
index cd587dc71..451bca539 100755
--- a/src/Projector/Projector1D2Order.cpp
+++ b/src/Projector/Projector1D2Order.cpp
@@ -18,14 +18,12 @@ using namespace std;
 Projector1D2Order::Projector1D2Order( Params &params, Patch *patch ) : Projector1D( params, patch )
 {
     dx_inv_  = 1.0/params.cell_length[0];
-    dx_ov_dt = params.cell_length[0] / params.timestep;
+    dx_ov_dt_ = params.cell_length[0] / params.timestep;
     
-    index_domain_begin = patch->getCellStartingGlobalIndex( 0 );
-    
-    dt             = params.timestep;
-    dts2           = params.timestep/2.;
-    dts4           = params.timestep/4.;
+    i_domain_begin_ = patch->getCellStartingGlobalIndex( 0 );
     
+    dts2_           = params.timestep/2.;
+    dts4_           = params.timestep/4.;
 }
 
 
@@ -43,7 +41,7 @@ void Projector1D2Order::currents( double *Jx, double *Jy, double *Jz, Particles
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xip, xj_m_xip2;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[5], S1[5], Wl[5], Wt[5], Jx_p[5];            // arrays used for the Esirkepov projection method
@@ -76,7 +74,7 @@ void Projector1D2Order::currents( double *Jx, double *Jy, double *Jz, Particles
     
     // coefficients 2nd order interpolation on 3 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2-xj_m_xip+0.25 );
     S1[ip_m_ipo+2] = ( 0.75-xj_m_xip2 );
     S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
@@ -115,7 +113,7 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xip, xj_m_xip2;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[5], S1[5], Wl[5], Wt[5], Jx_p[5];            // arrays used for the Esirkepov projection method
@@ -132,7 +130,7 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     // Locate particle old position on the primal grid
     xj_m_xipo  = *deltaold;                   // normalized distance to the nearest grid point
-    xj_m_xipo2 = xj_m_xipo*xj_m_xipo;                 // square of the normalized distance to the nearest grid point
+    xj_m_xipo2 = xj_m_xipo*xj_m_xipo;         // square of the normalized distance to the nearest grid point
     
     // Locate particle new position on the primal grid
     xjn       = particles.position( 0, ipart ) * dx_inv_;
@@ -142,16 +140,16 @@ void Projector1D2Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     
     // coefficients 2nd order interpolation on 3 nodes
-    S0[1] = 0.5 * ( xj_m_xipo2-xj_m_xipo+0.25 );
-    S0[2] = ( 0.75-xj_m_xipo2 );
-    S0[3] = 0.5 * ( xj_m_xipo2+xj_m_xipo+0.25 );
+    S0[1] = 0.5 * ( xj_m_xipo2 - xj_m_xipo + 0.25 );
+    S0[2] = ( 0.75 - xj_m_xipo2 );
+    S0[3] = 0.5 * ( xj_m_xipo2 + xj_m_xipo + 0.25 );
     
     // coefficients 2nd order interpolation on 3 nodes
     ipo = *iold;
-    ip_m_ipo = ip-ipo-index_domain_begin;
-    S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2-xj_m_xip+0.25 );
-    S1[ip_m_ipo+2] = ( 0.75-xj_m_xip2 );
-    S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
+    ip_m_ipo = ip-ipo-i_domain_begin_;
+    S1[ip_m_ipo+1] = 0.5 * ( xj_m_xip2 - xj_m_xip + 0.25 );
+    S1[ip_m_ipo+2] = ( 0.75 - xj_m_xip2 );
+    S1[ip_m_ipo+3] = 0.5 * ( xj_m_xip2 + xj_m_xip + 0.25 );
     
     // coefficients used in the Esirkepov method
     for( unsigned int i=0; i<5; i++ ) {
@@ -228,7 +226,7 @@ void Projector1D2Order::basic( double *rhoj, Particles &particles, unsigned int
     S1[2] = ( 0.75-xj_m_xip2 );
     S1[3] = 0.5 * ( xj_m_xip2+xj_m_xip+0.25 );
     
-    ip -= index_domain_begin + 2 + bin_shift;
+    ip -= i_domain_begin_ + 2 + bin_shift;
     
     // 2nd order projection for charge density
     // At the 2nd order, oversize = 2.
@@ -270,7 +268,7 @@ void Projector1D2Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i + 0.5;      // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im1 = i-1;
     ip1 = i+1;
     
@@ -291,7 +289,7 @@ void Projector1D2Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i;            // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im1 = i-1;
     ip1 = i+1;
     
@@ -377,9 +375,9 @@ void Projector1D2Order::susceptibility( ElectroMagn *EMfields, Particles &partic
     for( int ipart=istart ; ipart<iend; ipart++ ) {//Loop on bin particles
     
     
-        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2*one_over_mass;
+        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2_*one_over_mass;
         // ! ponderomotive force is proportional to charge squared and the field is divided by 4 instead of 2
-        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4*one_over_mass*one_over_mass;
+        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4_*one_over_mass*one_over_mass;
         // (charge over mass)^2
         charge_sq_over_mass_sq      = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*one_over_mass*one_over_mass;
         
@@ -430,7 +428,7 @@ void Projector1D2Order::susceptibility( ElectroMagn *EMfields, Particles &partic
         // ---------------------------
         // Calculate the total susceptibility
         // ---------------------------
-        ip -= index_domain_begin + 2;
+        ip -= i_domain_begin_ + 2;
         
         for( unsigned int i=0 ; i<5 ; i++ ) {
             iloc = ( i+ip );
@@ -472,9 +470,9 @@ void Projector1D2Order::susceptibilityOnBuffer( ElectroMagn */*EMfields*/, doubl
     for( int ipart=istart ; ipart<iend; ipart++ ) {//Loop on bin particles
     
     
-        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2*one_over_mass;
+        charge_over_mass_dts2       = ( double )( particles.charge( ipart ) )*dts2_*one_over_mass;
         // ! ponderomotive force is proportional to charge squared and the field is divided by 4 instead of 2
-        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4*one_over_mass*one_over_mass;
+        charge_sq_over_mass_sq_dts4 = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*dts4_*one_over_mass*one_over_mass;
         // (charge over mass)^2
         charge_sq_over_mass_sq      = ( double )( particles.charge( ipart ) )*( double )( particles.charge( ipart ) )*one_over_mass*one_over_mass;
         
@@ -525,7 +523,7 @@ void Projector1D2Order::susceptibilityOnBuffer( ElectroMagn */*EMfields*/, doubl
         // ---------------------------
         // Calculate the total susceptibility
         // ---------------------------
-        ip -= index_domain_begin + 2;
+        ip -= i_domain_begin_ + 2;
         
         for( unsigned int i=0 ; i<5 ; i++ ) {
             iloc = ( i+ip );
@@ -609,7 +607,7 @@ void Projector1D2Order::ionizationCurrentsForTasks( double *b_Jx, double *b_Jy,
     Sxd[1] = ( 0.75-xpmxid2 );
     Sxd[2] = 0.5 * ( xpmxid2+xpmxid+0.25 );
     
-    ip  -= index_domain_begin+bin_shift;
+    ip  -= i_domain_begin_+bin_shift;
     //id  -= i_domain_begin+bin_shift;
     
     
diff --git a/src/Projector/Projector1D2Order.h b/src/Projector/Projector1D2Order.h
index 364bab673..afe63551f 100755
--- a/src/Projector/Projector1D2Order.h
+++ b/src/Projector/Projector1D2Order.h
@@ -12,7 +12,7 @@ class Projector1D2Order : public Projector1D
     //! Project global current densities (EMfields->Jx_/Jy_/Jz_)
     inline void currents( double *Jx, double *Jy, double *Jz, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );
     //! Project global current densities (EMfields->Jx_/Jy_/Jz_/rho), diagFields timestep
-    inline void currentsAndDensity( double *Jx, double *Jy, double *Jz, double *rho, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );  
+    inline void  __attribute__((always_inline)) currentsAndDensity( double *Jx, double *Jy, double *Jz, double *rho, Particles &particles, unsigned int ipart, double invgf, int *iold, double *deltaold, int bin_shift = 0 );  
   
     //! Project global current charge (EMfields->rho_ , J), for initialization and diags
     void basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, int bin_shift = 0 ) override final;
@@ -36,8 +36,7 @@ class Projector1D2Order : public Projector1D
     void susceptibilityOnBuffer( ElectroMagn *EMfields, double *b_Chi, int bin_shift, int bdim0, Particles &particles, double species_mass, SmileiMPI *smpi, int istart, int iend,  int ithread, int icell = 0, int ipart_ref = 0 ) override final;
     
 private:
-    double dx_ov_dt;
-    double dt, dts2, dts4;
+    double dts2_, dts4_;
 };
 
 #endif
diff --git a/src/Projector/Projector1D2OrderGPU.cpp b/src/Projector/Projector1D2OrderGPU.cpp
new file mode 100755
index 000000000..19493ef8d
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPU.cpp
@@ -0,0 +1,294 @@
+
+
+#if defined( SMILEI_ACCELERATOR_GPU )
+#include "Projector1D2OrderGPUKernelCUDAHIP.h"
+#include <cmath>
+#include "Tools.h"
+#endif
+
+#include "Projector1D2OrderGPU.h"
+
+#include "ElectroMagn.h"
+#include "Patch.h"
+#include "gpu.h"
+
+
+Projector1D2OrderGPU::Projector1D2OrderGPU( Params &parameters, Patch *a_patch )
+    : Projector1D{ parameters, a_patch }
+{
+    Projector1D::dx_inv_         = 1.0 / parameters.cell_length[0];
+    Projector1D::dx_ov_dt_       = parameters.cell_length[0] / parameters.timestep;
+    Projector1D::i_domain_begin_ = a_patch->getCellStartingGlobalIndex( 0 );
+
+    not_spectral_  = !parameters.is_pxr;
+    dts2_ = parameters.timestep / 2.0;
+    dts4_ = dts2_ / 2.0;
+#if defined( SMILEI_ACCELERATOR_GPU ) 
+    x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
+#else
+    ERROR( "Only usable in GPU mode! " );
+#endif
+}
+
+Projector1D2OrderGPU::~Projector1D2OrderGPU()
+{
+}
+#if defined( SMILEI_ACCELERATOR_GPU )
+
+
+//! Project global current densities (EMfields->Jx_/Jy_/Jz_)
+extern "C" void
+currentDepositionKernel1DOnDevice( double *__restrict__ host_Jx,
+                         double *__restrict__ host_Jy,
+                         double *__restrict__ host_Jz,
+                         int Jx_size,
+                         int Jy_size,
+                         int Jz_size,
+                         const double *__restrict__ device_particle_position_x,
+                         const double *__restrict__ device_particle_momentum_y,
+                         const double *__restrict__ device_particle_momentum_z,
+                         const short *__restrict__ device_particle_charge,
+                         const double *__restrict__ device_particle_weight,
+                         const int *__restrict__ host_bin_index,
+                         unsigned int x_dimension_bin_count_,
+                         const double *__restrict__ host_invgf_,
+                         const int *__restrict__ host_iold_,
+                         const double *__restrict__ host_deltaold_,
+                         double inv_cell_volume,
+                         double dx_inv_,
+                         double dx_ov_dt_,
+                         int    i_domain_begin_,
+                         int    not_spectral_ )
+{
+    cudahip1d::currentDepositionKernel1D( host_Jx, host_Jy, host_Jz,
+                                 Jx_size, Jy_size, Jz_size,
+                                 device_particle_position_x, device_particle_momentum_y,
+                                 device_particle_momentum_z,
+                                 device_particle_charge,
+                                 device_particle_weight,
+                                 host_bin_index,
+                                 x_dimension_bin_count_,
+                                 host_invgf_,
+                                 host_iold_, host_deltaold_,
+                                 inv_cell_volume,
+                                 dx_inv_,
+                                 dx_ov_dt_,
+                                 i_domain_begin_,
+                                 not_spectral_ );
+}
+
+
+//! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
+//!
+extern "C" void
+currentAndDensityDepositionKernel1DOnDevice( double *__restrict__ host_Jx,
+                                   double *__restrict__ host_Jy,
+                                   double *__restrict__ host_Jz,
+                                   double *__restrict__ host_rho,
+                                   int Jx_size,
+                                   int Jy_size,
+                                   int Jz_size,
+                                   int rho_size,
+                                   const double *__restrict__ device_particle_position_x,
+                                   const double *__restrict__ device_particle_momentum_y,
+                                   const double *__restrict__ device_particle_momentum_z,
+                                   const short *__restrict__ device_particle_charge,
+                                   const double *__restrict__ device_particle_weight,
+                                   const int *__restrict__ host_bin_index,
+                                   unsigned int x_dimension_bin_count_,
+                                   const double *__restrict__ host_invgf_,
+                                   const int *__restrict__ host_iold_,
+                                   const double *__restrict__ host_deltaold_,
+                                   double inv_cell_volume,
+                                   double dx_inv_,
+                                   double dx_ov_dt_,
+                                   int    i_domain_begin_,
+                                   int    not_spectral_ )
+{
+    cudahip1d::currentAndDensityDepositionKernel1D( host_Jx, host_Jy, host_Jz, host_rho,
+                                           Jx_size, Jy_size, Jz_size, rho_size,
+                                           device_particle_position_x, device_particle_momentum_y,
+                                           device_particle_momentum_z,
+                                           device_particle_charge,
+                                           device_particle_weight,
+                                           host_bin_index,
+                                           x_dimension_bin_count_,
+                                           host_invgf_,
+                                           host_iold_, host_deltaold_,
+                                           inv_cell_volume,
+                                           dx_inv_,
+                                           dx_ov_dt_,
+                                           i_domain_begin_,
+                                           not_spectral_ );
+}
+#endif
+
+// ---------------------------------------------------------------------------------------------------------------------
+//! Project charge : frozen & diagFields timstep
+// ---------------------------------------------------------------------------------------------------------------------
+void Projector1D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, int bin_shift )
+{
+
+    //Warning : this function is used for frozen species or initialization only and doesn't use the standard scheme.
+    //rho type = 0
+    //Jx type = 1
+    //Jy type = 2
+    //Jz type = 3
+    
+    // The variable bin received is  number of bin * cluster width.
+    // Declare local variables
+    int ip;
+    double xjn, xj_m_xip, xj_m_xip2;
+    double S1[5];            // arrays used for the Esirkepov projection method
+    
+    double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
+    if( type > 0 ) {
+        charge_weight *= 1./sqrt( 1.0 + particles.momentum( 0, ipart )*particles.momentum( 0, ipart )
+                                  + particles.momentum( 1, ipart )*particles.momentum( 1, ipart )
+                                  + particles.momentum( 2, ipart )*particles.momentum( 2, ipart ) );
+                                  
+        if( type == 1 ) {
+            charge_weight *= particles.momentum( 0, ipart );
+        } else if( type == 2 ) {
+            charge_weight *= particles.momentum( 1, ipart );
+        } else {
+            charge_weight *= particles.momentum( 2, ipart );
+        }
+    }
+    
+    // Initialize variables
+    for( unsigned int i=0; i<5; i++ ) {
+        S1[i]=0.;
+    }//i
+    
+    // Locate particle new position on the primal grid
+    xjn       = particles.position( 0, ipart ) * dx_inv_;
+    ip        = round( xjn + 0.5 * ( type==1 ) );     // index of the central node
+    xj_m_xip  = xjn - ( double )ip;                   // normalized distance to the nearest grid point
+    xj_m_xip2 = xj_m_xip * xj_m_xip;                  // square of the normalized distance to the nearest grid point
+    
+    // coefficients 2nd order interpolation on 3 nodes
+    //ip_m_ipo = ip-ipo;
+    S1[1] = 0.5 * ( xj_m_xip2 - xj_m_xip + 0.25 );
+    S1[2] = ( 0.75 - xj_m_xip2 );
+    S1[3] = 0.5 * ( xj_m_xip2 + xj_m_xip + 0.25 );
+    
+    ip -= i_domain_begin_ + 2 + bin_shift;
+    
+    // 2nd order projection for charge density
+    // At the 2nd order, oversize = 2.
+    for( unsigned int i=0; i<5; i++ ) {
+        rhoj[i + ip ] += charge_weight * S1[i];
+    }
+    
+}
+
+
+void Projector1D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
+                                                      Particles   &particles,
+                                                      SmileiMPI   *smpi,
+                                                      int,
+                                                      int,
+                                                      int  ithread,
+                                                      bool diag_flag,
+                                                      bool is_spectral,
+                                                      int  ispec,
+                                                      int  icell,
+                                                      int  ipart_ref )
+{
+    std::vector<int>    &iold  = smpi->dynamics_iold[ithread];
+    std::vector<double> &delta = smpi->dynamics_deltaold[ithread];
+    std::vector<double> &invgf = smpi->dynamics_invgf[ithread];
+
+    if( diag_flag ) {
+
+        double *const __restrict__ b_Jx = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->data() : EMfields->Jx_->data();
+        unsigned int Jx_size            = EMfields->Jx_s[ispec] ? EMfields->Jx_s[ispec]->size() : EMfields->Jx_->size();
+
+        double *const __restrict__ b_Jy = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->data() : EMfields->Jy_->data();
+        unsigned int Jy_size            = EMfields->Jy_s[ispec] ? EMfields->Jy_s[ispec]->size() : EMfields->Jy_->size();
+
+        double *const __restrict__ b_Jz = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->data() : EMfields->Jz_->data();
+        unsigned int Jz_size            = EMfields->Jz_s[ispec] ? EMfields->Jz_s[ispec]->size() : EMfields->Jz_->size();
+
+        double *const __restrict__ b_rho = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
+        unsigned int rho_size            = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
+
+        // Does not compute Rho !
+
+#if defined( SMILEI_ACCELERATOR_GPU )
+
+        currentAndDensityDepositionKernel1DOnDevice( b_Jx,b_Jy,b_Jz,b_rho,
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            particles.getPtrPosition( 0 ),
+                            particles.getPtrMomentum( 1 ),
+                            particles.getPtrMomentum( 2 ),
+                            particles.getPtrCharge(),
+                            particles.getPtrWeight(),
+                            particles.last_index.data(),
+                            x_dimension_bin_count_,
+                            invgf.data(),
+                            iold.data(),
+                            delta.data(),
+                            inv_cell_volume,
+                            dx_inv_,
+                            dx_ov_dt_,
+                            i_domain_begin_,
+                            not_spectral_ );
+
+#else
+        SMILEI_ASSERT( false );
+#endif
+    } else {
+        if( is_spectral ) {
+            ERROR( "Not implemented on GPU" );
+        }
+        else{
+
+#if defined( SMILEI_ACCELERATOR_GPU )
+            currentDepositionKernel1DOnDevice(Jx_, Jy_, Jz_,
+                    EMfields->Jx_->size(), EMfields->Jy_->size(), EMfields->Jz_->size(),
+                    particles.getPtrPosition( 0 ),
+                    particles.getPtrMomentum( 1 ),
+                    particles.getPtrMomentum( 2 ),
+                    particles.getPtrCharge(),
+                    particles.getPtrWeight(),
+                    particles.last_index.data(),
+                    x_dimension_bin_count_,
+                    invgf.data(),
+                    iold.data(),
+                    delta.data(),
+                    inv_cell_volume,
+                    dx_inv_,
+                    dx_ov_dt_,
+                    i_domain_begin_,
+                    not_spectral_ );
+#else
+        SMILEI_ASSERT( false );
+#endif
+        }
+    }
+}
+
+void Projector1D2OrderGPU::ionizationCurrents( Field      *Jx,
+                                               Field      *Jy,
+                                               Field      *Jz,
+                                               Particles  &particles,
+                                               int         ipart,
+                                               LocalFields Jion )
+{
+    ERROR( "Projector1D2OrderGPU::ionizationCurrents(): Not implemented !" );
+}
+
+void Projector1D2OrderGPU::susceptibility( ElectroMagn *EMfields,
+                                           Particles   &particles,
+                                           double       species_mass,
+                                           SmileiMPI   *smpi,
+                                           int          istart,
+                                           int          iend,
+                                           int          ithread,
+                                           int          icell,
+                                           int          ipart_ref )
+{
+    ERROR( "Projector1D2OrderGPU::susceptibility(): Not implemented !" );
+}
diff --git a/src/Projector/Projector1D2OrderGPU.h b/src/Projector/Projector1D2OrderGPU.h
new file mode 100755
index 000000000..f35e8e4ee
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPU.h
@@ -0,0 +1,127 @@
+#ifndef SMILEI_PROJECTOR_PROJECTOR1D2ORDERGPU_H
+#define SMILEI_PROJECTOR_PROJECTOR1D2ORDERGPU_H
+
+#include "Projector1D.h"
+
+
+class Projector1D2OrderGPU : public Projector1D
+{
+public:
+    Projector1D2OrderGPU( Params &parameters, Patch *a_patch );
+    ~Projector1D2OrderGPU();
+
+    /// For initialization and diags, doesn't use the standard scheme
+    void basic( double      *rhoj,
+                Particles   &particles,
+                unsigned int ipart,
+                unsigned int type,
+                int bin_shift = 0 ) override;
+    /// Projection wrapper
+    void currentsAndDensityWrapper( ElectroMagn *EMfields,
+                                    Particles   &particles,
+                                    SmileiMPI   *smpi,
+                                    int          istart,
+                                    int          iend,
+                                    int          ithread,
+                                    bool         diag_flag,
+                                    bool         is_spectral,
+                                    int          ispec,
+                                    int          icell     = 0,
+                                    int          ipart_ref = 0 ) override;
+
+    void susceptibility( ElectroMagn *EMfields,
+                         Particles   &particles,
+                         double       species_mass,
+                         SmileiMPI   *smpi,
+                         int          istart,
+                         int          iend,
+                         int          ithread,
+                         int          icell     = 0,
+                         int          ipart_ref = 0 ) override;
+                         
+    void ionizationCurrents( Field      *Jx,
+                             Field      *Jy,
+                             Field      *Jz,
+                             Particles  &particles,
+                             int         ipart,
+                             LocalFields Jion ) override;
+
+
+    //!Wrapper for task-based implementation of Smilei
+    //! compiler complains otherwise even if it is completely useless
+    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
+                                             double *b_Jy,
+                                             double *b_Jz,
+                                             double *b_rho,
+                                             int bin_width,
+                                             Particles &particles,
+                                             SmileiMPI *smpi,
+                                             int istart,
+                                             int iend,
+                                             int ithread,
+                                             bool diag_flag,
+                                             bool is_spectral,
+                                             int ispec,
+                                             int icell = 0,
+                                             int ipart_ref = 0 ) override {};
+/*#if defined( SMILEI_ACCELERATOR_GPU )
+
+extern "C" void
+currentDepositionKernel1DOnDevice( double *__restrict__ Jx,
+                         double *__restrict__ Jy,
+                         double *__restrict__ Jz,
+                         int Jx_size,
+                         int Jy_size,
+                         int Jz_size,
+                         const double *__restrict__ particle_position_x,
+                         const double *__restrict__ particle_momentum_y,
+                         const double *__restrict__ particle_momentum_z,
+                         const short *__restrict__ particle_charge,
+                         const double *__restrict__ particle_weight,
+                         const int *__restrict__ host_bin_index,
+                         unsigned int x_dimension_bin_count,
+                         const double *__restrict__ invgf_,
+                         const int *__restrict__ iold_,
+                         const double *__restrict__ deltaold_,
+                         double inv_cell_volume,
+                         double dx_inv,
+                         double dx_ov_dt,
+                         int    i_domain_begin,
+                         int    not_spectral_ );
+
+extern "C" void
+currentAndDensityDepositionKernel1DOnDevice( double *__restrict__ Jx,
+                                   double *__restrict__ Jy,
+                                   double *__restrict__ Jz,
+                                   double *__restrict__ rho,
+                                   int Jx_size,
+                                   int Jy_size,
+                                   int Jz_size,
+                                   int rho_size,
+                                   const double *__restrict__ particle_position_x,
+                                   const double *__restrict__ particle_momentum_y,
+                                   const double *__restrict__ particle_momentum_z,
+                                   const short *__restrict__ particle_charge,
+                                   const double *__restrict__ particle_weight,
+                                   const int *__restrict__ host_bin_index,
+                                   unsigned int x_dimension_bin_count,
+                                   const double *__restrict__ invgf_,
+                                   const int *__restrict__ iold_,
+                                   const double *__restrict__ deltaold_,
+                                   double inv_cell_volume,
+                                   double dx_inv,
+                                   double dx_ov_dt,
+                                   int    i_domain_begin,
+                                   int    not_spectral_ );
+
+#endif*/
+
+
+protected:
+    double dts2_;
+    double dts4_;
+    int    not_spectral_;
+    unsigned int x_dimension_bin_count_;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
new file mode 100755
index 000000000..0a77a63db
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.cu
@@ -0,0 +1,1070 @@
+
+
+#if defined( __HIP__ ) 
+    #include <hip/hip_runtime.h>
+#elif defined( __NVCC__ )
+    #include <cuda_runtime.h>
+    #include <cuda.h>
+#endif
+
+#include "Params.h"
+#include "gpu.h"
+#include <iostream>
+
+#if defined( __HIP__ )
+  // HIP compiler support enabled (for .cu files)
+#else
+    #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
+#endif
+
+#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+    #include <cmath>
+    #include "Tools.h"
+#else
+    #include <hip/hip_runtime.h>
+
+    #include "Params.h"
+    #include "gpu.h"
+#endif
+
+//    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+
+//namespace naive {
+//
+//    void //static inline void
+//    currentDepositionKernel2D( double     *__restrict__ Jx,
+//                             double       *__restrict__ Jy,
+//                             double       *__restrict__ Jz,
+//                             int Jx_size,
+//                             int Jy_size,
+//                             int Jz_size,
+//                             const double *__restrict__ device_particle_position_x,
+//                             const double *__restrict__ device_particle_momentum_y,
+//                             const double *__restrict__ device_particle_momentum_z,
+//                             const short  *__restrict__ device_particle_charge,
+//                             const double *__restrict__ device_particle_weight,
+//                             const int    *__restrict__ host_bin_index,
+//                             unsigned int x_dimension_bin_count,
+//                             const double *__restrict__ invgf_,
+//                             const int *__restrict__ iold_,
+//                             const double *__restrict__ deltaold_,
+//                             double inv_cell_volume,
+//                             double dx_inv,
+//                             double dx_ov_dt,
+//                             int    i_domain_begin,
+//                             int    not_spectral_ )
+//    {
+//        // The OMP implementation is NOT bin aware. As per the precondition on
+//        // host_bin_index, index zero always contains the number of particles.
+//        // See nvidiaParticles::prepareBinIndex / setHostBinIndex.
+//        const unsigned int bin_count      = 1;
+//        const int          particle_count = host_bin_index[bin_count - 1];
+//
+//        #if defined( SMILEI_ACCELERATOR_GPU_OMP )
+//            #pragma omp target is_device_ptr /* map */ ( /* to: */                                            \
+//                                                         device_particle_position_x /* [0:particle_count] */, \
+//                                                         device_particle_momentum_y /* [0:particle_count] */, \
+//                                                         device_particle_momentum_z /* [0:particle_count] */, \
+//                                                         device_particle_charge /* [0:particle_count] */,     \
+//                                                         device_particle_weight /* [0:particle_count] */ )
+//            #pragma omp teams thread_limit( 64 ) distribute parallel for
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+//            #pragma acc parallel                      \
+//            deviceptr( device_particle_position_x,    \
+//                       device_particle_momentum_y,    \
+//                       device_particle_momentum_z,    \
+//                       device_particle_charge,        \
+//                       device_particle_weight )       \
+//                present( iold [0:3 * particle_count], \
+//                         deltaold [0:3 * particle_count] )
+//            #pragma acc loop gang worker vector
+//        #endif
+//        for( int particle_index = 0; particle_index < particle_count; ++particle_index ) {
+//            const double invgf                        = invgf_[particle_index];
+//            const int *const __restrict__ iold        = &iold_[particle_index];
+//            const double *const __restrict__ deltaold = &deltaold_[particle_index];
+//
+//            double Sx0[5];
+//            double Sx1[5];
+//
+//            // Variable declaration & initialization
+//            // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+//
+//            // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+//            {
+//                const double delta  = deltaold[0 * particle_count];
+//                const double delta2 = delta * delta;
+//                Sx0[0]              = 0.0;
+//                Sx0[1]              = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx0[2]              = 0.75 - delta2;
+//                Sx0[3]              = 0.5 * ( delta2 + delta + 0.25 );
+//                Sx0[4]              = 0.0;
+//            }
+//
+//            // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+//            {
+//                const double xpn      = device_particle_position_x[particle_index] * dx_inv;
+//                const int    ip       = std::round( xpn );
+//                const int    ipo      = iold[0 * particle_count];
+//                const int    ip_m_ipo = ip - ipo - i_domain_begin;
+//                const double delta    = xpn - static_cast<double>( ip );
+//                const double delta2   = delta * delta;
+//
+//                Sx1[0] = 0.0;
+//                Sx1[1] = 0.0;
+//                // Sx1[2] = 0.0; // Always set below
+//                Sx1[3] = 0.0;
+//                Sx1[4] = 0.0;
+//
+//                Sx1[ip_m_ipo + 1] = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx1[ip_m_ipo + 2] = 0.75 - delta2;
+//                Sx1[ip_m_ipo + 3] = 0.5 * ( delta2 + delta + 0.25 );
+//            }
+//
+//            // (x,y,z) components of the current density for the macro-particle
+//            const double charge_weight = inv_cell_volume * static_cast<double>( device_particle_charge[particle_index] ) * device_particle_weight[particle_index];
+//            const double crx_p         = charge_weight * dx_ov_dt;
+//            const double cry_p         = charge_weight * dy_ov_dt;
+//            const double crz_p         = charge_weight * ( 1.0 / 3.0 ) * device_particle_momentum_z[particle_index] * invgf;
+//
+//            // This is the particle position as grid index
+//            // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+//            const int ipo = iold[0 * particle_count] - 2;
+//
+//            for( unsigned int i = 0; i < 1; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                /* Jx[iloc] += tmpJx[0]; */
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; j++ ) {
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
+//                                              Sy1[j] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                }
+//            }
+//
+//            double tmpJx[5]{};
+//
+//            for( unsigned int i = 1; i < 5; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                tmpJx[0] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( 0.5 * ( Sy1[0] - Sy0[0] ) );
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jx[iloc] += tmpJx[0];
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; ++j ) {
+//                    tmpJx[j] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( Sy0[j] + 0.5 * ( Sy1[j] - Sy0[j] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jx[iloc + j] += tmpJx[j];
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
+//                                              Sy1[j] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//                }
+//            }
+//        }
+//    } // end currentDepositionKernel
+//
+//    //static inline
+//    void
+//    currentAndDensityDepositionKernel( double *__restrict__ Jx,
+//                                       double *__restrict__ Jy,
+//                                       double *__restrict__ Jz,
+//                                       double *__restrict__ rho,
+//                                       int Jx_size,
+//                                       int Jy_size,
+//                                       int Jz_size,
+//                                       int rho_size,
+//                                       const double *__restrict__ device_particle_position_x,
+//                                       const double *__restrict__ device_particle_momentum_y,
+//                                       const double *__restrict__ device_particle_momentum_z,
+//                                       const short *__restrict__ device_particle_charge,
+//                                       const double *__restrict__ device_particle_weight,
+//                                       const int *__restrict__ host_bin_index,
+//                                       unsigned int,
+//                                       unsigned int,
+//                                       const double *__restrict__ invgf_,
+//                                       const int *__restrict__ iold_,
+//                                       const double *__restrict__ deltaold_,
+//                                       double inv_cell_volume,
+//                                       double dx_inv,
+//                                       double dx_ov_dt,
+//                                       int    i_domain_begin,
+//                                       int    not_spectral_ )
+//    {
+//        // The OMP implementation is NOT bin aware. As per the precondition on
+//        // host_bin_index, index zero always contains the number of particles.
+//        // See nvidiaParticles::prepareBinIndex / setHostBinIndex.
+//        const unsigned int bin_count      = 1;
+//        const int          particle_count = host_bin_index[bin_count - 1];
+//
+//        #if defined( SMILEI_ACCELERATOR_GPU_OMP )
+//            #pragma omp target is_device_ptr /* map */ ( /* to: */                                            \
+//                                                         device_particle_position_x /* [0:particle_count] */, \
+//                                                         device_particle_momentum_y /* [0:particle_count] */, \
+//                                                         device_particle_momentum_z /* [0:particle_count] */, \
+//                                                         device_particle_charge /* [0:particle_count] */,     \
+//                                                         device_particle_weight /* [0:particle_count] */ )
+//            #pragma omp teams thread_limit( 64 ) distribute parallel for
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
+//            #pragma acc parallel                      \
+//            deviceptr( device_particle_position_x,    \
+//                       device_particle_momentum_y,    \
+//                       device_particle_momentum_z,    \
+//                       device_particle_charge,        \
+//                       device_particle_weight )       \
+//                present( iold [0:3 * particle_count], \
+//                         deltaold [0:3 * particle_count] )
+//            #pragma acc loop gang worker vector
+//        #endif
+//        for( int particle_index = 0; particle_index < particle_count; ++particle_index ) {
+//            const double invgf                        = invgf_[particle_index];
+//            const int *const __restrict__ iold        = &iold_[particle_index];
+//            const double *const __restrict__ deltaold = &deltaold_[particle_index];
+//
+//            double Sx0[5];
+//            double Sx1[5];
+//            double Sy0[5];
+//            double Sy1[5];
+//
+//            // Variable declaration & initialization
+//            // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+//
+//            // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+//            {
+//                const double delta  = deltaold[0 * particle_count];
+//                const double delta2 = delta * delta;
+//                Sx0[0]              = 0.0;
+//                Sx0[1]              = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx0[2]              = 0.75 - delta2;
+//                Sx0[3]              = 0.5 * ( delta2 + delta + 0.25 );
+//                Sx0[4]              = 0.0;
+//            }
+//            // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+//            {
+//                const double xpn      = device_particle_position_x[particle_index] * dx_inv;
+//                const int    ip       = std::round( xpn );
+//                const int    ipo      = iold[0 * particle_count];
+//                const int    ip_m_ipo = ip - ipo - i_domain_begin;
+//                const double delta    = xpn - static_cast<double>( ip );
+//                const double delta2   = delta * delta;
+//
+//                Sx1[0] = 0.0;
+//                Sx1[1] = 0.0;
+//                // Sx1[2] = 0.0; // Always set below
+//                Sx1[3] = 0.0;
+//                Sx1[4] = 0.0;
+//
+//                Sx1[ip_m_ipo + 1] = 0.5 * ( delta2 - delta + 0.25 );
+//                Sx1[ip_m_ipo + 2] = 0.75 - delta2;
+//                Sx1[ip_m_ipo + 3] = 0.5 * ( delta2 + delta + 0.25 );
+//            }
+//
+//            // (x,y,z) components of the current density for the macro-particle
+//            const double charge_weight = inv_cell_volume * static_cast<double>( device_particle_charge[particle_index] ) * device_particle_weight[particle_index];
+//            const double crx_p         = charge_weight * dx_ov_dt;
+//            const double cry_p         = charge_weight * dy_ov_dt;
+//            const double crz_p         = charge_weight * ( 1.0 / 3.0 ) * device_particle_momentum_z[particle_index] * invgf;
+//
+//            // This is the particle position as grid index
+//            // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+//            const int ipo = iold[0 * particle_count] - 2;
+//            const int jpo = iold[1 * particle_count] - 2;
+//
+//            // case i =0
+//            for( unsigned int i = 0; i < 1; ++i ) {
+//                const int iloc = ( i + ipo ) ;
+//                /* Jx[iloc] += tmpJx[0]; */
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                rho[iloc] += charge_weight * Sx1[0] * Sy1[0];
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; j++ ) {
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
+//                                              Sy1[j] * ( /* 0.5 * Sx0[i] + */ Sx1[i] ) );
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    rho[iloc + j] += charge_weight * Sx1[0] * Sy1[j];
+//                }
+//            }
+//
+//            double tmpJx[5]{};
+//
+//            // case i> 0
+//            for( unsigned int i = 1; i < 5; ++i ) {
+//                const int iloc = i + ipo ;
+//                tmpJx[0] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jx[iloc] += tmpJx[0];
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                Jz[iloc] += crz_p * ( Sy1[0] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//
+//                SMILEI_ACCELERATOR_ATOMIC
+//                rho[iloc] += charge_weight * Sx1[i] * Sy1[0];
+//
+//                double tmp = 0.0;
+//                for( unsigned int j = 1; j < 5; ++j ) {
+//                    tmpJx[j] -= crx_p * ( Sx1[i - 1] - Sx0[i - 1] ) * ( Sy0[j] + 0.5 * ( Sy1[j] - Sy0[j] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jx[iloc + j] += tmpJx[j];
+//                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
+//                                              Sy1[j] * ( 0.5 * Sx0[i] + Sx1[i] ) );
+//
+//                    SMILEI_ACCELERATOR_ATOMIC
+//                    rho[iloc + j] += charge_weight * Sx1[i] * Sy1[j];
+//                }
+//            }
+//        }
+//    } // end currentDepositionKernel
+//
+//
+//} // namespace naive
+//
+//    #else
+
+namespace cudahip1d {
+    namespace detail {
+#if defined( __HIP__ )
+        static inline void
+        checkErrors( ::hipError_t an_error_code,
+                     const char  *file_name,
+                     int          line )
+        {
+            if( an_error_code != ::hipError_t::hipSuccess ) {
+                std::cout << "HIP error at " << file_name << ":" << line
+                          << " -> " << ::hipGetErrorString( an_error_code ) << std::endl;
+                std::exit( EXIT_FAILURE );
+            }
+        }
+// For NVIDIA compiler 
+#elif defined(  __NVCC__ )
+        static inline void
+        checkErrors( ::cudaError_t an_error_code,
+                     const char  *file_name,
+                     int          line )
+        {
+            if( an_error_code != ::cudaError_t::cudaSuccess ) {
+                std::cout << "CUDA error at " << file_name << ":" << line << " -> " << ::cudaGetErrorString( an_error_code ) << std::endl;
+                std::exit( EXIT_FAILURE );
+            }
+        }
+#endif
+
+   } // namespace detail
+
+    #define checkHIPErrors( an_expression )                           \
+        do {                                                          \
+            detail::checkErrors( an_expression, __FILE__, __LINE__ ); \
+        } while( 0 )  
+
+    namespace kernel {
+        namespace atomic {
+            namespace LDS {
+                __device__ void
+                AddNoReturn( float *a_pointer, float a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+
+                __device__ void
+                AddNoReturn( double *a_pointer, double a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+            } // namespace LDS
+
+            namespace GDS {
+                __device__ void
+                AddNoReturn( double *a_pointer, double a_value )
+                {
+        #if defined( __gfx90a__ )
+                    ::unsafeAtomicAdd( a_pointer, a_value );
+        #else
+                    ::atomicAdd( a_pointer, a_value );
+        #endif
+                }
+            } // namespace GDS
+        }     // namespace atomic
+
+
+        template <typename ComputeFloat>
+        __device__ void inline __attribute__((always_inline)) init_S0(const ComputeFloat delta, ComputeFloat *__restrict__ S0)
+        {
+            const ComputeFloat delta2 = delta * delta;
+            S0[0] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+            S0[1] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+            S0[2] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+            S0[3] = static_cast<ComputeFloat>( 0.0 ) ;
+        }
+
+        template <typename ComputeFloat>
+        __device__ void inline __attribute__((always_inline)) init_S1(const ComputeFloat xpn, const int ipo,  const int i_domain_begin,
+                                                                      ComputeFloat *__restrict__ S1)
+        {
+            // const int    ip        = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+            const int          ip       = std::round( xpn );
+            const int          ip_m_ipo = ip - ipo - i_domain_begin;
+            const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+            const ComputeFloat delta2   = delta * delta;
+
+            S1[0] = static_cast<ComputeFloat>( 0.0 );
+            S1[1] = static_cast<ComputeFloat>( 0.0 ); // S1[2] = 0.0; // Always set below
+            S1[3] = static_cast<ComputeFloat>( 0.0 );
+            S1[4] = static_cast<ComputeFloat>( 0.0 );
+
+            S1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+            S1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+            S1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+        }
+
+
+        template <typename ComputeFloat,
+                  typename ReductionFloat,
+                  std::size_t kWorkgroupSize>
+        __global__ void
+        // __launch_bounds__(kWorkgroupSize, 1)
+        DepositCurrentDensity_1D_Order2( double *__restrict__ device_Jx,
+                                         double *__restrict__ device_Jy,
+                                         double *__restrict__ device_Jz,
+                                         int Jx_size,
+                                         int Jy_size,
+                                         int Jz_size,
+                                         const double *__restrict__ device_particle_position_x,
+                                         const double *__restrict__ device_particle_momentum_y,
+                                         const double *__restrict__ device_particle_momentum_z,
+                                         const short *__restrict__ device_particle_charge,
+                                         const double *__restrict__ device_particle_weight,
+                                         const int *__restrict__ device_bin_index,
+                                         const double *__restrict__ device_invgf_,
+                                         const int *__restrict__ device_iold_,
+                                         const double *__restrict__ device_deltaold_,
+                                         ComputeFloat inv_cell_volume,
+                                         ComputeFloat dx_inv,
+                                         ComputeFloat dx_ov_dt,
+                                         int          i_domain_begin,
+                                         int          not_spectral_ )
+        {
+            // TODO(Etienne M): refactor this function. Break it into smaller
+            // pieces (lds init/store, coeff computation, deposition etc..)
+            // TODO(Etienne M): __ldg could be used to slightly improve GDS load
+            // speed. This would only have an effect on Nvidia cards as this
+            // operation is a no op on AMD.
+            const unsigned int workgroup_size = kWorkgroupSize; // blockDim.x;
+            const unsigned int bin_count      = gridDim.x;
+            const unsigned int loop_stride    = workgroup_size; // This stride should enable better memory access coalescing
+
+            const unsigned int x_cluster_coordinate          = blockIdx.x;
+            const unsigned int workgroup_dedicated_bin_index = x_cluster_coordinate;
+            const unsigned int thread_index_offset           = threadIdx.x;
+
+            // The unit is the cell
+            const unsigned int global_x_scratch_space_coordinate_offset = x_cluster_coordinate * Params::getGPUClusterWidth( 1 /* 1D */ );
+            const int GPUClusterWithGCWidth = Params::getGPUClusterWithGhostCellWidth( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            // NOTE: We gain from the particles not being sorted inside a
+            // cluster because it reduces the bank conflicts one gets when
+            // multiple threads access the same part of the shared memory. Such
+            // "conflicted" accesses are serialized !
+            // NOTE: We use a bit to much LDS. For Jx, the first row could be
+            // discarded, for Jy we could remove the first column.
+
+            static constexpr unsigned int kFieldScratchSpaceSize = Params::getGPUInterpolationClusterCellVolume( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            //    kWorkgroupSize, bin_count, loop_stride, x_cluster_coordinate, workgroup_dedicated_bin_index, thread_index_offset, Params::getGPUClusterWidth(1), GPUClusterWithGCWidth, kFieldScratchSpaceSize, global_x_scratch_space_coordinate_offset);
+            // NOTE: I tried having only one cache and reusing it. Doing that
+            // requires you to iterate multiple time over the particle which is
+            // possible but cost more bandwidth. The speedup was ~x0.92.
+            __shared__ ReductionFloat Jx_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jy_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jz_scratch_space[kFieldScratchSpaceSize];
+
+            // Init the shared memory
+
+            for( unsigned int field_index = thread_index_offset;
+                 field_index < kFieldScratchSpaceSize;
+                 field_index += workgroup_size ) {
+                Jx_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+                Jy_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+                Jz_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+            }
+
+            __syncthreads();
+
+            const unsigned int particle_count = device_bin_index[bin_count - 1];
+
+            // This workgroup has to process distance(last_particle,
+            // first_particle) particles
+            const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
+            const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
+
+            for( unsigned int particle_index = first_particle + thread_index_offset;
+                 particle_index < last_particle;
+                 particle_index += loop_stride ) {
+                const ComputeFloat invgf                  = static_cast<ComputeFloat>( device_invgf_[particle_index] );
+                const int *const __restrict__ iold        = &device_iold_[particle_index];
+                const double *const __restrict__ deltaold = &device_deltaold_[particle_index];
+
+                ComputeFloat Sx0[5];
+                ComputeFloat Sx1[5];
+
+                // Variable declaration & initialization
+                // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+
+                // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+                {
+                    const ComputeFloat delta  = deltaold[0 * particle_count];
+                    const ComputeFloat delta2 = delta * delta;
+
+                    Sx0[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx0[1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx0[3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[4] = static_cast<ComputeFloat>( 0.0 );
+                }
+                //init_S0(deltaold[0 * particle_count], Sx0);
+                //init_S0(deltaold[1 * particle_count], Sy0);
+
+                // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+                {
+                    // const int    ip             = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+                    const ComputeFloat xpn      = static_cast<ComputeFloat>( device_particle_position_x[particle_index] ) * dx_inv;
+                    const int          ip       = std::round( xpn );
+                    const int          ipo      = iold[0 * particle_count];
+                    const int          ip_m_ipo = ip - ipo - i_domain_begin;
+                    const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+                    const ComputeFloat delta2   = delta * delta;
+
+                    Sx1[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[1] = static_cast<ComputeFloat>( 0.0 );
+                    // Sx1[2] = 0.0; // Always set below
+                    Sx1[3] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[4] = static_cast<ComputeFloat>( 0.0 );
+
+                    Sx1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                }
+
+                // (x,y,z) components of the current density for the macro-particle
+                const ComputeFloat charge_weight = inv_cell_volume * static_cast<ComputeFloat>( device_particle_charge[particle_index] ) * static_cast<ComputeFloat>( device_particle_weight[particle_index] );
+                const ComputeFloat crx_p         = charge_weight * dx_ov_dt;
+                const ComputeFloat cry_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ) * invgf;
+                const ComputeFloat crz_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ) * invgf;
+
+                // This is the particle position as grid index
+                // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+                const int ipo = iold[0 * particle_count] -
+                                2 /* Offset so we dont uses negative numbers in the loop */ -
+                                global_x_scratch_space_coordinate_offset /* Offset to get cluster relative coordinates */;
+
+                // Jx
+                ComputeFloat tmpJx[5]{};
+                for( unsigned int i = 1; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jy
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = cry_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jz
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = crz_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+            } // particle_index
+
+            __syncthreads();
+
+            for( unsigned int field_index = thread_index_offset; field_index < kFieldScratchSpaceSize; field_index += workgroup_size ) {
+                const unsigned int local_x_scratch_space_coordinate = field_index % GPUClusterWithGCWidth; // /GPUClusterWithGCWidth
+                const unsigned int global_x_scratch_space_coordinate = global_x_scratch_space_coordinate_offset + local_x_scratch_space_coordinate;
+
+                const unsigned int global_memory_index = global_x_scratch_space_coordinate;
+                const unsigned int scratch_space_index = field_index; // local_x_scratch_space_coordinate * GPUClusterWithGCWidth + local_y_scratch_space_coordinate;
+
+                // These atomics are basically free (very few of them).
+                atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index +  not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) ); //  We handle the FTDT/picsar 
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
+            }
+        } // end DepositCurrent
+
+
+        template <typename ComputeFloat,
+                  typename ReductionFloat,
+                  std::size_t kWorkgroupSize>
+        __global__ void
+        // __launch_bounds__(kWorkgroupSize, 1)
+        DepositCurrentAndDensity_1D_Order2( double *__restrict__ device_Jx,
+                                            double *__restrict__ device_Jy,
+                                            double *__restrict__ device_Jz,
+                                            double *__restrict__ device_rho,
+                                            int Jx_size,
+                                            int Jy_size,
+                                            int Jz_size,
+                                            int rho_size,
+                                            const double *__restrict__ device_particle_position_x,
+                                            const double *__restrict__ device_particle_momentum_y,
+                                            const double *__restrict__ device_particle_momentum_z,
+                                            const short *__restrict__ device_particle_charge,
+                                            const double *__restrict__ device_particle_weight,
+                                            const int *__restrict__ device_bin_index,
+                                            const double *__restrict__ device_invgf_,
+                                            const int *__restrict__ device_iold_,
+                                            const double *__restrict__ device_deltaold_,
+                                            ComputeFloat inv_cell_volume,
+                                            ComputeFloat dx_inv,
+                                            ComputeFloat dx_ov_dt,
+                                            int          i_domain_begin,
+                                            int          not_spectral_ )
+        {
+            // TODO(Etienne M): refactor this function. Break it into smaller
+            // pieces (lds init/store, coeff computation, deposition etc..)
+            // TODO(Etienne M): __ldg could be used to slightly improve GDS load
+            // speed. This would only have an effect on Nvidia cards as this
+            // operation is a no op on AMD.
+            const unsigned int workgroup_size = kWorkgroupSize; // blockDim.x;
+            const unsigned int bin_count      = gridDim.x;
+            const unsigned int loop_stride    = workgroup_size; // This stride should enable better memory access coalescing
+
+            const unsigned int x_cluster_coordinate          = blockIdx.x;
+            const unsigned int workgroup_dedicated_bin_index = x_cluster_coordinate ; 
+            const unsigned int thread_index_offset           = threadIdx.x;
+
+            // The unit is the cell
+            const unsigned int global_x_scratch_space_coordinate_offset = x_cluster_coordinate * Params::getGPUClusterWidth( 1 /* 1D */ );
+
+            // NOTE: We gain from the particles not being sorted inside a
+            // cluster because it reduces the bank conflicts one gets when
+            // multiple threads access the same part of the shared memory. Such
+            // "conflicted" accesses are serialized !
+            // NOTE: We use a bit to much LDS. For Jx, the first row could be
+            // discarded, for Jy we could remove the first column.
+
+            const int GPUClusterWithGCWidth = Params::getGPUClusterWithGhostCellWidth( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+            static constexpr unsigned int kFieldScratchSpaceSize = Params::getGPUInterpolationClusterCellVolume( 1 /* 1D */, 2 /* 2nd order interpolation */ );
+
+            // NOTE: I tried having only one cache and reusing it. Doing that
+            // requires you to iterate multiple time over the particle which is
+            // possible but cost more bandwidth. The speedup was ~x0.92.
+            __shared__ ReductionFloat Jx_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jy_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat Jz_scratch_space[kFieldScratchSpaceSize];
+            __shared__ ReductionFloat rho_scratch_space[kFieldScratchSpaceSize];
+
+            // Init the shared memory
+
+            for( unsigned int field_index = thread_index_offset;
+                field_index < kFieldScratchSpaceSize;
+                field_index += workgroup_size ) {
+                Jx_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                Jy_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                Jz_scratch_space[field_index]  = static_cast<ReductionFloat>( 0.0 );
+                rho_scratch_space[field_index] = static_cast<ReductionFloat>( 0.0 );
+            }
+
+            __syncthreads();
+
+            const unsigned int particle_count = device_bin_index[bin_count - 1];
+
+            // This workgroup has to process distance(last_particle,
+            // first_particle) particles
+            const unsigned int first_particle = workgroup_dedicated_bin_index == 0 ? 0 : device_bin_index[workgroup_dedicated_bin_index - 1];
+            const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
+
+            for( unsigned int particle_index = first_particle + thread_index_offset;
+                 particle_index < last_particle;
+                 particle_index += loop_stride ) {
+                const ComputeFloat                  invgf = static_cast<ComputeFloat>( device_invgf_[particle_index] );
+                const int *const __restrict__        iold = &device_iold_[particle_index];
+                const double *const __restrict__ deltaold = &device_deltaold_[particle_index];
+
+                ComputeFloat Sx0[5];
+                ComputeFloat Sx1[5];
+
+                // Variable declaration & initialization
+                // Esirkepov's paper: https://arxiv.org/pdf/physics/9901047.pdf
+
+                // Locate the particle on the primal grid at former time-step & calculate coeff. S0
+                {
+                    const ComputeFloat delta  = deltaold[0 * particle_count];
+                    const ComputeFloat delta2 = delta * delta;
+
+                    Sx0[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx0[1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx0[3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx0[4] = static_cast<ComputeFloat>( 0.0 );
+                }
+
+                // Locate the particle on the primal grid at current time-step & calculate coeff. S1
+                {
+                    // const int    ip             = static_cast<int>( xpn + 0.5 ); // std::round | rounding approximation which is correct enough and faster in this case
+                    const ComputeFloat xpn      = static_cast<ComputeFloat>( device_particle_position_x[particle_index] ) * dx_inv;
+                    const int          ip       = std::round( xpn );
+                    const int          ipo      = iold[0 * particle_count];
+                    const int          ip_m_ipo = ip - ipo - i_domain_begin;
+                    const ComputeFloat delta    = xpn - static_cast<ComputeFloat>( ip );
+                    const ComputeFloat delta2   = delta * delta;
+
+                    Sx1[0] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[1] = static_cast<ComputeFloat>( 0.0 );
+                    // Sx1[2] = 0.0; // Always set below
+                    Sx1[3] = static_cast<ComputeFloat>( 0.0 );
+                    Sx1[4] = static_cast<ComputeFloat>( 0.0 );
+
+                    Sx1[ip_m_ipo + 1] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 - delta + static_cast<ComputeFloat>( 0.25 ) );
+                    Sx1[ip_m_ipo + 2] = static_cast<ComputeFloat>( 0.75 ) - delta2;
+                    Sx1[ip_m_ipo + 3] = static_cast<ComputeFloat>( 0.5 ) * ( delta2 + delta + static_cast<ComputeFloat>( 0.25 ) );
+                }
+
+                // (x,y,z) components of the current density for the macro-particle
+                const ComputeFloat charge_weight = inv_cell_volume * static_cast<ComputeFloat>( device_particle_charge[particle_index] ) * static_cast<ComputeFloat>( device_particle_weight[particle_index] );
+                const ComputeFloat crx_p         = charge_weight * dx_ov_dt;
+                const ComputeFloat cry_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_y[particle_index] ) * invgf;
+                const ComputeFloat crz_p         = charge_weight * static_cast<ComputeFloat>( device_particle_momentum_z[particle_index] ) * invgf;
+
+                // This is the particle position as grid index
+                // This minus 2 come from the order 2 scheme, based on a 5 points stencil from -2 to +2.
+                const int ipo = iold[0 * particle_count] -
+                                2 /* Offset so we dont uses negative numbers in the loop */ -
+                                global_x_scratch_space_coordinate_offset /* Offset to get cluster relative coordinates */;
+
+                // Jx
+                ComputeFloat tmpJx[5]{};
+                for( unsigned int i = 1; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jy
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = cry_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Jz
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int    iloc = i + ipo;
+                    tmpJx[i] = crz_p * 0.5 * (Sx0[i] - Sx1[i]); 
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                }
+
+                // Rho
+                for( unsigned int i = 0; i < 5; ++i ) {
+                    const int iloc = i + ipo;
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[i] ) );
+                }
+
+                // improvements ideas: 1. unrolling to reduce the size of Sx0 and Sx1
+                // 2. combine the loops
+
+                /*
+                // 
+                {
+                    //ComputeFloat tmp = 0.5 * (Sx0[0] - Sx1[0]); // = - 0.5 * Sx1[0]
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[ipo], static_cast<ReductionFloat>( -cry_p * 0.5 * Sx1[0] ) );
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[ipo], static_cast<ReductionFloat>( -crz_p * 0.5 * Sx1[0] ) );
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[ipo], static_cast<ReductionFloat>( charge_weight * Sx1[0] ) );
+                }*/
+                /*for( unsigned int i = 1; i < 4; ++i ) {
+                    const int iloc = i + ipo;
+                    tmpJx[i] = tmpJx[i-1] + crx_p * (Sx0[i-1] - Sx1[i-1]); 
+                    ComputeFloat tmp = 0.5 * (Sx0[i] - Sx1[i]);
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( cry_p * tmp ) );
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( crz_p * tmp ) );
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[i] ) );
+                }*/
+                /* i=4
+                {
+                    const int iloc = i + ipo;
+                    tmpJx[4] = tmpJx[3] + crx_p * (Sx0[i-1] - Sx1[i-1]); // can save some registers by  tmpJx[0] instead of tmpJx[4] ? reducing its size from 5 to 4?
+                    //ComputeFloat tmp = 0.5 * (Sx0[4] - Sx1[4]); // = -0.5 * Sx1[4]
+                    atomic::LDS::AddNoReturn( &Jx_scratch_space[iloc], static_cast<ReductionFloat>( tmpJx[i] ) );    
+                    atomic::LDS::AddNoReturn( &Jy_scratch_space[iloc], static_cast<ReductionFloat>( -cry_p * 0.5 * Sx1[4] ) ); //null
+                    atomic::LDS::AddNoReturn( &Jz_scratch_space[iloc], static_cast<ReductionFloat>( -crz_p * 0.5 * Sx1[4] ) ); //null
+                    atomic::LDS::AddNoReturn( &rho_scratch_space[iloc], static_cast<ReductionFloat>( charge_weight * Sx1[4] ) ); //null
+                }
+
+
+                */
+
+            } // particle_index
+
+            __syncthreads();
+
+            for( unsigned int field_index = thread_index_offset;
+                 field_index < kFieldScratchSpaceSize;
+                 field_index += workgroup_size ) {
+
+                const unsigned int local_x_scratch_space_coordinate = field_index % GPUClusterWithGCWidth;
+                const unsigned int global_x_scratch_space_coordinate = global_x_scratch_space_coordinate_offset + local_x_scratch_space_coordinate;
+
+                const unsigned int global_memory_index = global_x_scratch_space_coordinate;
+                const unsigned int scratch_space_index = field_index; // local_x_scratch_space_coordinate * GPUClusterWithGCWidth + local_y_scratch_space_coordinate;
+
+                // These atomics are basically free (very few of them).
+                atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_rho[global_memory_index], static_cast<double>( rho_scratch_space[scratch_space_index] ) );
+            }
+        }
+    } // namespace kernel
+
+
+    //static inline
+    void
+    currentDepositionKernel1D( double *__restrict__ host_Jx,
+                             double *__restrict__ host_Jy,
+                             double *__restrict__ host_Jz,
+                             int Jx_size,
+                             int Jy_size,
+                             int Jz_size,
+                             const double *__restrict__ device_particle_position_x,
+                             const double *__restrict__ device_particle_momentum_y,
+                             const double *__restrict__ device_particle_momentum_z,
+                             const short *__restrict__ device_particle_charge,
+                             const double *__restrict__ device_particle_weight,
+                             const int *__restrict__ host_bin_index,
+                             unsigned int x_dimension_bin_count,
+                             const double *__restrict__ host_invgf_,
+                             const int *__restrict__ host_iold_,
+                             const double *__restrict__ host_deltaold_,
+                             double inv_cell_volume,
+                             double dx_inv,
+                             double dx_ov_dt,
+                             int    i_domain_begin,
+                             int    not_spectral_ )
+    {
+        SMILEI_ASSERT( Params::getGPUClusterWidth( 1 /* 1D */ ) != -1 &&
+                       Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
+
+        // NOTE:
+        // This cluster is very strongly bound by atomic operations in LDS (shared memory)
+        // TODO(Etienne M): Find a way to lessen the atomic usage
+
+        const ::dim3 kGridDimension  { static_cast<uint32_t>( x_dimension_bin_count ), 1, 1 };
+
+        static constexpr std::size_t kWorkgroupSize = 128;
+        const ::dim3                 kBlockDimension{ static_cast<uint32_t>( kWorkgroupSize ), 1, 1 };
+
+        // NOTE: On cards lacking hardware backed Binary64 atomic operations,
+        // falling back to Binary32 (supposing hardware support for atomic
+        // operations) can lead to drastic performance improvement.
+        // One just need to assign 'float' to ReductionFloat.
+        //
+        using ComputeFloat   = double;
+        using ReductionFloat = double;
+
+	auto KernelFunction = kernel::DepositCurrentDensity_1D_Order2<ComputeFloat, ReductionFloat, kWorkgroupSize>;
+#if defined ( __HIP__ ) 
+        hipLaunchKernelGGL( KernelFunction,
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0, // Stream
+                            // Kernel arguments
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            Jx_size, Jy_size, Jz_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_ );
+
+        checkHIPErrors( ::hipDeviceSynchronize() );
+#elif defined (  __NVCC__ )
+	KernelFunction <<<
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0 // Stream
+                       >>>
+                       (
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            Jx_size, Jy_size, Jz_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_
+                       );
+        checkHIPErrors( ::cudaDeviceSynchronize() );
+#endif
+    }
+
+    //static inline 
+    void
+    currentAndDensityDepositionKernel1D( double *__restrict__ host_Jx,
+                                       double *__restrict__ host_Jy,
+                                       double *__restrict__ host_Jz,
+                                       double *__restrict__ host_rho,
+                                       int Jx_size,
+                                       int Jy_size,
+                                       int Jz_size,
+                                       int rho_size,
+                                       const double *__restrict__ device_particle_position_x,
+                                       const double *__restrict__ device_particle_momentum_y,
+                                       const double *__restrict__ device_particle_momentum_z,
+                                       const short *__restrict__ device_particle_charge,
+                                       const double *__restrict__ device_particle_weight,
+                                       const int *__restrict__ host_bin_index,
+                                       unsigned int x_dimension_bin_count,
+                                       const double *__restrict__ host_invgf_,
+                                       const int *__restrict__ host_iold_,
+                                       const double *__restrict__ host_deltaold_,
+                                       double inv_cell_volume,
+                                       double dx_inv,
+                                       double dx_ov_dt,
+                                       int    i_domain_begin,
+                                       int    not_spectral_ )
+    {
+        // & because one  1D ; 2 because of 2nd order interpolation
+        SMILEI_ASSERT( Params::getGPUClusterWidth( 1 ) != -1 && 
+                       Params::getGPUClusterGhostCellBorderWidth( 2 ) != -1 );
+
+        const ::dim3 kGridDimension  { static_cast<uint32_t>( x_dimension_bin_count ), 1, 1 };
+
+        static constexpr std::size_t kWorkgroupSize = 128;
+        const ::dim3                 kBlockDimension{ static_cast<uint32_t>( kWorkgroupSize ), 1, 1 };
+
+        // NOTE: On cards lacking hardware backed Binary64 atomic operations,
+        // falling back to Binary32 (supposing hardware support for atomic
+        // operations) can lead to drastic performance improvement.
+        // One just need to assign 'float' to ReductionFloat.
+        //
+        using ComputeFloat   = double;
+        using ReductionFloat = double;
+        auto KernelFunction = kernel::DepositCurrentAndDensity_1D_Order2<ComputeFloat, ReductionFloat, kWorkgroupSize>;
+#if defined ( __HIP__ ) 
+        hipLaunchKernelGGL( KernelFunction,
+                            kGridDimension,
+                            kBlockDimension,
+                            0, // Shared memory
+                            0, // Stream
+                            // Kernel arguments
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_rho ),
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            device_particle_position_x,
+                            device_particle_momentum_y,
+                            device_particle_momentum_z,
+                            device_particle_charge,
+                            device_particle_weight,
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),
+                            inv_cell_volume,
+                            dx_inv,
+                            dx_ov_dt,
+                            i_domain_begin,
+                            not_spectral_ );
+
+        checkHIPErrors( ::hipDeviceSynchronize() );
+#elif defined (  __NVCC__ )
+        KernelFunction <<<                                                                                                             
+                            kGridDimension,                                                                                            
+                            kBlockDimension,                                                                                           
+                            0, // Shared memory                                                                                        
+                            0 // Stream                                                                                                
+                       >>>                                                                                                             
+                       (                                                                                                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jx ),                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jy ),                               
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_Jz ),
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_rho ),
+                            Jx_size, Jy_size, Jz_size, rho_size,
+                            device_particle_position_x,                                                                                
+                            device_particle_momentum_y,                                                                                
+                            device_particle_momentum_z,                                                                                
+                            device_particle_charge,                                                                                    
+                            device_particle_weight,                                                                                    
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_bin_index ),                        
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_invgf_ ),                           
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_iold_ ),                            
+                            smilei::tools::gpu::HostDeviceMemoryManagement::GetDevicePointer( host_deltaold_ ),                        
+                            inv_cell_volume,                                                                                           
+                            dx_inv,                                                                                           
+                            dx_ov_dt,                                                                                       
+                            i_domain_begin,                                                                                              
+                            not_spectral_                                                                                               
+                       );                                                                                                              
+        checkHIPErrors( ::cudaDeviceSynchronize() );  
+#endif 
+    }
+
+} // namespace cudahip1D
+
+
diff --git a/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
new file mode 100755
index 000000000..f5e64e408
--- /dev/null
+++ b/src/Projector/Projector1D2OrderGPUKernelCUDAHIP.h
@@ -0,0 +1,71 @@
+//! HIP CUDA implementation
+
+#ifndef Projector1D2OrderGPUKernelCUDAHIP_H
+#define Projector1D2OrderGPUKernelCUDAHIP_H
+
+#if defined( SMILEI_ACCELERATOR_GPU )
+
+#if defined( __HIP__ )
+    #include <hip/hip_runtime.h>
+#elif defined( __NVCC__ )
+    #include <cuda_runtime.h>
+    #include <cuda.h>
+#endif
+
+#include "Params.h"
+#include "gpu.h"
+
+namespace cudahip1d {
+
+void currentDepositionKernel1D( double *__restrict__ host_Jx,
+                               double *__restrict__ host_Jy,
+                               double *__restrict__ host_Jz,
+                               int Jx_size,
+                               int Jy_size,
+                               int Jz_size,
+                               const double *__restrict__ device_particle_position_x,
+                               const double *__restrict__ device_particle_momentum_y,
+                               const double *__restrict__ device_particle_momentum_z,
+                               const short *__restrict__ device_particle_charge,
+                               const double *__restrict__ device_particle_weight,
+                               const int *__restrict__ host_bin_index,
+                               unsigned int x_dimension_bin_count,
+                               const double *__restrict__ host_invgf_,
+                               const int *__restrict__ host_iold_,
+                               const double *__restrict__ host_deltaold_,
+                               double inv_cell_volume,
+                               double dx_inv,
+                               double dx_ov_dt,
+                               int    i_domain_begin,
+                               int    not_spectral_ );
+
+void currentAndDensityDepositionKernel1D(
+                                double *__restrict__ host_Jx,
+                                double *__restrict__ host_Jy,
+                                double *__restrict__ host_Jz,
+                                double *__restrict__ host_rho,
+                                int Jx_size,
+                                int Jy_size,
+                                int Jz_size,
+                                int rho_size,
+                                const double *__restrict__ device_particle_position_x,
+                                const double *__restrict__ device_particle_momentum_y,
+                                const double *__restrict__ device_particle_momentum_z,
+                                const short *__restrict__ device_particle_charge,
+                                const double *__restrict__ device_particle_weight,
+                                const int *__restrict__ host_bin_index,
+                                unsigned int x_dimension_bin_count,
+                                const double *__restrict__ host_invgf_,
+                                const int *__restrict__ host_iold_,
+                                const double *__restrict__ host_deltaold_,
+                                double inv_cell_volume,
+                                double dx_inv,
+                                double dx_ov_dt,
+                                int    i_domain_begin,
+                                int    not_spectral_ );
+
+} // namespace cudahip1d
+
+#endif
+#endif
+
diff --git a/src/Projector/Projector1D4Order.cpp b/src/Projector/Projector1D4Order.cpp
index e78ddea67..ea4eafa4a 100755
--- a/src/Projector/Projector1D4Order.cpp
+++ b/src/Projector/Projector1D4Order.cpp
@@ -19,11 +19,11 @@ Projector1D4Order::Projector1D4Order( Params &params, Patch *patch )
     : Projector1D( params, patch )
 {
     dx_inv_  = 1.0/params.cell_length[0];
-    dx_ov_dt = params.cell_length[0] / params.timestep;
+    dx_ov_dt_ = params.cell_length[0] / params.timestep;
     
     //double defined for use in coefficients
     
-    index_domain_begin = patch->getCellStartingGlobalIndex( 0 );
+    i_domain_begin_ = patch->getCellStartingGlobalIndex( 0 );
     
     DEBUG( "cell_length "<< params.cell_length[0] );
     
@@ -43,7 +43,7 @@ void Projector1D4Order::currents( double *Jx, double *Jy, double *Jz, Particles
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xipo3, xj_m_xipo4, xj_m_xip, xj_m_xip2, xj_m_xip3, xj_m_xip4;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[7], S1[7], Wl[7], Wt[7], Jx_p[7];            // arrays used for the Esirkepov projection method
@@ -82,7 +82,7 @@ void Projector1D4Order::currents( double *Jx, double *Jy, double *Jz, Particles
     
     // coefficients 2nd order interpolation on 5 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     
     S1[ip_m_ipo+1] = dble_1_ov_384   - dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 - dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     S1[ip_m_ipo+2] = dble_19_ov_96   - dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  + dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
@@ -125,7 +125,7 @@ void Projector1D4Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     int ip_m_ipo;
     double charge_weight = inv_cell_volume * ( double )( particles.charge( ipart ) )*particles.weight( ipart );
     double xjn, xj_m_xipo, xj_m_xipo2, xj_m_xipo3, xj_m_xipo4, xj_m_xip, xj_m_xip2, xj_m_xip3, xj_m_xip4;
-    double crx_p = charge_weight*dx_ov_dt;                // current density for particle moving in the x-direction
+    double crx_p = charge_weight*dx_ov_dt_;                // current density for particle moving in the x-direction
     double cry_p = charge_weight*particles.momentum( 1, ipart )*invgf;  // current density in the y-direction of the macroparticle
     double crz_p = charge_weight*particles.momentum( 2, ipart )*invgf;  // current density allow the y-direction of the macroparticle
     double S0[7], S1[7], Wl[7], Wt[7], Jx_p[7];            // arrays used for the Esirkepov projection method
@@ -164,7 +164,7 @@ void Projector1D4Order::currentsAndDensity( double *Jx, double *Jy, double *Jz,
     
     // coefficients 2nd order interpolation on 5 nodes
     ipo        = *iold;                          // index of the central node
-    ip_m_ipo = ip-ipo-index_domain_begin;
+    ip_m_ipo = ip-ipo-i_domain_begin_;
     
     S1[ip_m_ipo+1] = dble_1_ov_384   - dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 - dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     S1[ip_m_ipo+2] = dble_19_ov_96   - dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  + dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
@@ -253,7 +253,7 @@ void Projector1D4Order::basic( double *rhoj, Particles &particles, unsigned int
     S1[4] = dble_19_ov_96   + dble_11_ov_24 * xj_m_xip  + dble_1_ov_4 * xj_m_xip2  - dble_1_ov_6  * xj_m_xip3 - dble_1_ov_6  * xj_m_xip4;
     S1[5] = dble_1_ov_384   + dble_1_ov_48  * xj_m_xip  + dble_1_ov_16 * xj_m_xip2 + dble_1_ov_12 * xj_m_xip3 + dble_1_ov_24 * xj_m_xip4;
     
-    ip -= index_domain_begin + 3 + bin_shift ;
+    ip -= i_domain_begin_ + 3 + bin_shift ;
     
     // 4th order projection for the charge density
     // At the 4th order, oversize = 3.
@@ -299,7 +299,7 @@ void Projector1D4Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi3 = xjmxi2*xjmxi;                 // cube
     xjmxi4 = xjmxi2*xjmxi2;                 // fourth-power
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im2 = i-2;
     im1 = i-1;
     ip1 = i+1;
@@ -326,7 +326,7 @@ void Projector1D4Order::ionizationCurrents( Field *Jx, Field *Jy, Field *Jz, Par
     xjmxi  = xjn - ( double )i;            // normalized distance to the nearest grid point
     xjmxi2 = xjmxi*xjmxi;                  // square of the normalized distance to the nearest grid point
     
-    i  -= index_domain_begin;
+    i  -= i_domain_begin_;
     im2 = i-2;
     im1 = i-1;
     ip1 = i+1;
@@ -476,7 +476,7 @@ void  Projector1D4Order::ionizationCurrentsForTasks( double *b_Jx, double *b_Jy,
     Sxd[3] = dble_19_ov_96   + dble_11_ov_24 * xpmxid  + dble_1_ov_4  * xpmxid2 - dble_1_ov_6  * xpmxid3 - dble_1_ov_6  * xpmxid4;
     Sxd[4] = dble_1_ov_384   + dble_1_ov_48  * xpmxid  + dble_1_ov_16 * xpmxid2 + dble_1_ov_12 * xpmxid3 + dble_1_ov_24 * xpmxid4;
 
-    ip  -= index_domain_begin+bin_shift;
+    ip  -= i_domain_begin_+bin_shift;
     // id  -= i_domain_begin;
 
     for (unsigned int i=0 ; i<5 ; i++) {
diff --git a/src/Projector/Projector1D4Order.h b/src/Projector/Projector1D4Order.h
index 6cd570d62..3ef38a7c7 100755
--- a/src/Projector/Projector1D4Order.h
+++ b/src/Projector/Projector1D4Order.h
@@ -33,7 +33,6 @@ class Projector1D4Order : public Projector1D
     void susceptibility( ElectroMagn *EMfields, Particles &particles, double species_mass, SmileiMPI *smpi, int istart, int iend,  int ithread, int icell = 0, int ipart_ref = 0 ) override final;
 
 private:
-    double dx_ov_dt;
     static constexpr double dble_1_ov_384   = 1.0/384.0;
     static constexpr double dble_1_ov_48    = 1.0/48.0 ;
     static constexpr double dble_1_ov_16    = 1.0/16.0 ;
diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp
index cfe20eb7d..a91a29dde 100755
--- a/src/Projector/Projector2D2OrderGPU.cpp
+++ b/src/Projector/Projector2D2OrderGPU.cpp
@@ -21,12 +21,12 @@ Projector2D2OrderGPU::Projector2D2OrderGPU( Params &parameters, Patch *a_patch )
     // initialize it's member variable) we better initialize
     // Projector2D2OrderGPU's member variable after explicitly initializing
     // Projector2D.
-    not_spectral  = !parameters.is_pxr;
+    not_spectral_  = !parameters.is_pxr;
     dt   = parameters.timestep;
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) 
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) 
     // When sorting is disabled, these values are invalid (-1) and the HIP
     // implementation can't be used.
     x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
@@ -41,7 +41,7 @@ Projector2D2OrderGPU::~Projector2D2OrderGPU()
     // EMPTY
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )    //SMILEI_ACCELERATOR_GPU_OMP )
+#if defined( SMILEI_ACCELERATOR_GPU )    //SMILEI_ACCELERATOR_GPU_OMP )
 
 extern "C" void
 currentDepositionKernel2DOnDevice( double *__restrict__ Jx,
@@ -72,7 +72,7 @@ currentDepositionKernel2DOnDevice( double *__restrict__ Jx,
                          int    not_spectral );
 
 extern "C" void
-currentAndDensityDepositionKernelOnDevice( double *__restrict__ Jx,
+currentAndDensityDepositionKernel2DOnDevice( double *__restrict__ Jx,
                                    double *__restrict__ Jy,
                                    double *__restrict__ Jz,
                                    double *__restrict__ rho,
@@ -109,6 +109,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
     /// Project global current densities (EMfields->Jx_/Jy_/Jz_)
     ///
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP )
     currents( double *__restrict__ Jx,
               double *__restrict__ Jy,
               double *__restrict__ Jz,
@@ -132,7 +133,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               double,
               int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
         currentDepositionKernel2DOnDevice( Jx,
                                  Jy,
                                  Jz,
@@ -159,15 +159,22 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  j_domain_begin,
                                  nprimy,
                                  not_spectral );
+    }
 #else
+    currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int,
+              Particles   &, unsigned int , unsigned int ,const double *__restrict__ ,
+              const int    *__restrict__ , const double *__restrict__ , double , double , double ,
+              double , double , int    , int    , int    , double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
     /// Like currents(), project the particle current on the grid (Jx_/Jy_/Jz_)
     /// but also compute global current densities rho used for diagFields timestep
     ///
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP )
     currentsAndDensity( double *__restrict__ Jx,
                         double *__restrict__ Jy,
                         double *__restrict__ Jz,
@@ -193,8 +200,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                         double,
                         int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP )
-        currentAndDensityDepositionKernelOnDevice( Jx,
+        currentAndDensityDepositionKernel2DOnDevice( Jx,
                                            Jy,
                                            Jz,
                                            rho,
@@ -222,10 +228,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                            j_domain_begin,
                                            nprimy,
                                            not_spectral );
+    }
 #else
+    currentsAndDensity( double *__restrict__ , double *__restrict__ , double *__restrict__ , double *__restrict__ ,
+                        int , int , int , int , Particles   &, unsigned int , unsigned int ,
+                        const double *__restrict__ , const int *__restrict__ , const double *__restrict__ ,
+                        double , double , double , double , double , int    , int    , int    , double, int  )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 } // namespace
 
@@ -233,7 +245,7 @@ void Projector2D2OrderGPU::basic( double      *rhoj,
                                   Particles   &particles,
                                   unsigned int ipart,
                                   unsigned int type,
-                                  int          bin_shift )
+                                  int          /*bin_shift*/ )
 {
     // Warning : this function is used for frozen species only. It is assumed that position = position_old !!!
 
@@ -306,12 +318,12 @@ void Projector2D2OrderGPU::basic( double      *rhoj,
     }
 }
 
-void Projector2D2OrderGPU::ionizationCurrents( Field      *Jx,
-                                               Field      *Jy,
-                                               Field      *Jz,
-                                               Particles  &particles,
-                                               int         ipart,
-                                               LocalFields Jion )
+void Projector2D2OrderGPU::ionizationCurrents( Field      */*Jx*/,
+                                               Field      */*Jy*/,
+                                               Field      */*Jz*/,
+                                               Particles  &/*particles*/,
+                                               int         /*ipart*/,
+                                               LocalFields /*Jion */)
 {
     ERROR( "Projector2D2OrderGPU::ionizationCurrents(): Not implemented !" );
 }
@@ -325,8 +337,8 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                                                       bool diag_flag,
                                                       bool is_spectral,
                                                       int  ispec,
-                                                      int  icell,
-                                                      int  ipart_ref )
+                                                      int  /*icell*/,
+                                                      int  /*ipart_ref */)
 {
     std::vector<int>    &iold  = smpi->dynamics_iold[ithread];
     std::vector<double> &delta = smpi->dynamics_deltaold[ithread];
@@ -368,7 +380,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
         //                         i_domain_begin_, j_domain_begin_,
         //                         nprimy,
         //                         one_third,
-        //                         not_spectral );
+        //                         not_spectral_ );
         // }
 
         // Does not compute Rho !
@@ -385,7 +397,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                             i_domain_begin_, j_domain_begin_,
                             nprimy,
                             one_third,
-                            not_spectral );
+                            not_spectral_ );
 
     } else {
         // If no field diagnostics this timestep, then the projection is done directly on the total arrays
@@ -401,7 +413,7 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
             //                         i_domain_begin_, j_domain_begin_,
             //                         nprimy,
             //                         one_third,
-            //                         not_spectral );
+            //                         not_spectral_ );
             // }
         } else {
 
@@ -420,25 +432,25 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                       i_domain_begin_, j_domain_begin_,
                       nprimy,
                       one_third,
-                      not_spectral );
+                      not_spectral_ );
         }
     }
 }
 
-void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
-                                           Particles   &particles,
-                                           double       species_mass,
-                                           SmileiMPI   *smpi,
-                                           int          istart,
-                                           int          iend,
-                                           int          ithread,
-                                           int          icell,
-                                           int          ipart_ref )
+void Projector2D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/,
+                                           Particles   &/*particles*/,
+                                           double       /*species_mass*/,
+                                           SmileiMPI   */*smpi*/,
+                                           int          /*istart*/,
+                                           int          /*iend*/,
+                                           int          /*ithread*/,
+                                           int          /*icell*/,
+                                           int          /*ipart_ref */)
 {
     ERROR( "Projector2D2OrderGPU::susceptibility(): Not implemented !" );
 }
 
-//#if defined( SMILEI_ACCELERATOR_MODE )
+//#if defined( SMILEI_ACCELERATOR_GPU )
 ////! Project global current densities (EMfields->Jx_/Jy_/Jz_)
 ////!
 //extern "C" void
@@ -467,7 +479,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                         int    i_domain_begin,
 //                         int    j_domain_begin,
 //                         int    nprimy,
-//                         int    not_spectral )
+//                         int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -490,7 +502,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                 dx_ov_dt, dy_ov_dt,
 //                                 i_domain_begin, j_domain_begin,
 //                                 nprimy,
-//                                 not_spectral );
+//                                 not_spectral_ );
 //}
 //
 //
@@ -524,7 +536,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                   int    i_domain_begin,
 //                                   int    j_domain_begin,
 //                                   int    nprimy,
-//                                   int    not_spectral )
+//                                   int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -547,7 +559,7 @@ void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields,
 //                                           dx_ov_dt, dy_ov_dt,
 //                                           i_domain_begin, j_domain_begin,
 //                                           nprimy,
-//                                           not_spectral );
+//                                           not_spectral_ );
 //}
 //#endif
 
diff --git a/src/Projector/Projector2D2OrderGPU.h b/src/Projector/Projector2D2OrderGPU.h
index 9a799f9b5..5e555b8f2 100755
--- a/src/Projector/Projector2D2OrderGPU.h
+++ b/src/Projector/Projector2D2OrderGPU.h
@@ -46,21 +46,21 @@ class Projector2D2OrderGPU : public Projector2D
                                     int          ipart_ref = 0 ) override;
 
     //!Wrapper for task-based implementation of Smilei
-    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
-                                             double *b_Jy,
-                                             double *b_Jz,
-                                             double *b_rho,
-                                             int bin_width,
-                                             Particles &particles,
-                                             SmileiMPI *smpi,
-                                             int istart,
-                                             int iend,
-                                             int ithread,
-                                             bool diag_flag,
-                                             bool is_spectral,
-                                             int ispec,
-                                             int icell = 0,
-                                             int ipart_ref = 0 ) override {};
+    void currentsAndDensityWrapperOnBuffers( double *   /*b_Jx*/,
+                                             double *   /*b_Jy*/,
+                                             double *   /*b_Jz*/,
+                                             double *   /*b_rho*/,
+                                             int        /*bin_width*/,
+                                             Particles &/*particles*/,
+                                             SmileiMPI */*smpi*/,
+                                             int        /*istart*/,
+                                             int        /*iend*/,
+                                             int        /*ithread*/,
+                                             bool       /*diag_flag*/,
+                                             bool       /*is_spectral*/,
+                                             int        /*ispec*/,
+                                             int        /*icell*/ = 0,
+                                             int        /*ipart_ref*/ = 0 ) override {};
 
     /// Project susceptibility, used as source term in envelope equation
     ///
@@ -78,7 +78,7 @@ class Projector2D2OrderGPU : public Projector2D
     double dt;
     double dts2;
     double dts4;
-    int    not_spectral;
+    int    not_spectral_;
     unsigned int x_dimension_bin_count_;
     unsigned int y_dimension_bin_count_;
 };
diff --git a/src/Projector/Projector2D2OrderGPUKernel.cpp b/src/Projector/Projector2D2OrderGPUKernel.cpp
old mode 100644
new mode 100755
index 8f38f52fe..85814d54c
--- a/src/Projector/Projector2D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector2D2OrderGPUKernel.cpp
@@ -1,4 +1,4 @@
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include "Projector2D2OrderGPUKernelCUDAHIP.h"
 #include <cmath>
@@ -33,7 +33,7 @@ currentDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                          int    i_domain_begin,
                          int    j_domain_begin,
                          int    nprimy,
-                         int    not_spectral )
+                         int    not_spectral_ )
 {
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -56,14 +56,14 @@ currentDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                                  dx_ov_dt, dy_ov_dt,
                                  i_domain_begin, j_domain_begin,
                                  nprimy,
-                                 not_spectral );
+                                 not_spectral_ );
 }
 
 
 //! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
 //!
 extern "C" void
-currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
+currentAndDensityDepositionKernel2DOnDevice( double *__restrict__ host_Jx,
                                    double *__restrict__ host_Jy,
                                    double *__restrict__ host_Jz,
                                    double *__restrict__ host_rho,
@@ -90,14 +90,14 @@ currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
                                    int    i_domain_begin,
                                    int    j_domain_begin,
                                    int    nprimy,
-                                   int    not_spectral )
+                                   int    not_spectral_ )
 {
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //naive:: // the naive, OMP version serves as a reference along with the CPU version
     //#else
     cudahip2d::
     //#endif
-        currentAndDensityDepositionKernel( host_Jx, host_Jy, host_Jz, host_rho,
+        currentAndDensityDepositionKernel2D( host_Jx, host_Jy, host_Jz, host_rho,
                                            Jx_size, Jy_size, Jz_size, rho_size,
                                            device_particle_position_x, device_particle_position_y,
                                            device_particle_momentum_z,
@@ -113,7 +113,7 @@ currentAndDensityDepositionKernelOnDevice( double *__restrict__ host_Jx,
                                            dx_ov_dt, dy_ov_dt,
                                            i_domain_begin, j_domain_begin,
                                            nprimy,
-                                           not_spectral );
+                                           not_spectral_ );
 }
 #endif
 
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
old mode 100644
new mode 100755
index 666a409f4..7c177c206
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu
@@ -20,20 +20,20 @@
 
 #if defined( __HIP__ )
   // HIP compiler support enabled (for .cu files)
-    #else
-        #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
-    #endif
+#else
+    #define PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION 1
+#endif
 
-   #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
-        #include <cmath>
+#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
+    #include <cmath>
 
-        #include "Tools.h"
-    #else
-        #include <hip/hip_runtime.h>
+    #include "Tools.h"
+#else
+    #include <hip/hip_runtime.h>
 
-        #include "Params.h"
-        #include "gpu.h"
-    #endif
+    #include "Params.h"
+    #include "gpu.h"
+#endif
 
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 
@@ -65,7 +65,7 @@
 //                             int    i_domain_begin,
 //                             int    j_domain_begin,
 //                             int    nprimy,
-//                             int    not_spectral )
+//                             int    not_spectral_ )
 //    {
 //        // The OMP implementation is NOT bin aware. As per the precondition on
 //        // host_bin_index, index zero always contains the number of particles.
@@ -81,7 +81,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_position_y,    \
@@ -185,7 +185,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( /* i + */ ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
@@ -209,7 +209,7 @@
 //                    Jx[iloc + j] += tmpJx[j];
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( i + ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
@@ -248,7 +248,7 @@
 //                                       int    i_domain_begin,
 //                                       int    j_domain_begin,
 //                                       int    nprimy,
-//                                       int    not_spectral )
+//                                       int    not_spectral_ )
 //    {
 //        // The OMP implementation is NOT bin aware. As per the precondition on
 //        // host_bin_index, index zero always contains the number of particles.
@@ -264,7 +264,7 @@
 //                                                         device_particle_charge /* [0:particle_count] */,     \
 //                                                         device_particle_weight /* [0:particle_count] */ )
 //            #pragma omp teams thread_limit( 64 ) distribute parallel for
-//        #elif defined( SMILEI_OPENACC_MODE )
+//        #elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //            #pragma acc parallel                      \
 //            deviceptr( device_particle_position_x,    \
 //                       device_particle_position_y,    \
@@ -372,7 +372,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( /* i + */ ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( /* i + */ ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] /* + Sx0[i] */ ) +
@@ -407,7 +407,7 @@
 //                    tmp -= cry_p * ( Sy1[j - 1] - Sy0[j - 1] ) * ( Sx0[i] + 0.5 * ( Sx1[i] - Sx0[i] ) );
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
-//                    Jy[iloc + j + not_spectral * ( i + ipo )] += tmp;
+//                    Jy[iloc + j + not_spectral_ * ( i + ipo )] += tmp;
 //
 //                    SMILEI_ACCELERATOR_ATOMIC
 //                    Jz[iloc + j] += crz_p * ( Sy0[j] * ( 0.5 * Sx1[i] + Sx0[i] ) +
@@ -567,7 +567,7 @@ namespace cudahip2d {
                                          int          i_domain_begin,
                                          int          j_domain_begin,
                                          int          nprimy,
-                                         int          not_spectral )
+                                         int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -867,7 +867,7 @@ namespace cudahip2d {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
             }
         } // end DepositCurrent
@@ -903,7 +903,7 @@ namespace cudahip2d {
                                             int          i_domain_begin,
                                             int          j_domain_begin,
                                             int          nprimy,
-                                            int          not_spectral )
+                                            int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -1146,7 +1146,7 @@ namespace cudahip2d {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index], static_cast<double>( Jx_scratch_space[scratch_space_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate], static_cast<double>( Jy_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_Jz[global_memory_index], static_cast<double>( Jz_scratch_space[scratch_space_index] ) );
                 atomic::GDS::AddNoReturn( &device_rho[global_memory_index], static_cast<double>( rho_scratch_space[scratch_space_index] ) );
             }
@@ -1181,7 +1181,7 @@ namespace cudahip2d {
                              int    i_domain_begin,
                              int    j_domain_begin,
                              int    nprimy,
-                             int    not_spectral )
+                             int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 2 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -1229,7 +1229,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -1258,7 +1258,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
@@ -1266,7 +1266,7 @@ namespace cudahip2d {
 
     //static inline 
     void
-    currentAndDensityDepositionKernel( double *__restrict__ host_Jx,
+    currentAndDensityDepositionKernel2D( double *__restrict__ host_Jx,
                                        double *__restrict__ host_Jy,
                                        double *__restrict__ host_Jz,
                                        double *__restrict__ host_rho,
@@ -1293,7 +1293,7 @@ namespace cudahip2d {
                                        int    i_domain_begin,
                                        int    j_domain_begin,
                                        int    nprimy,
-                                       int    not_spectral )
+                                       int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 2 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -1341,7 +1341,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,
                             i_domain_begin, j_domain_begin,
                             nprimy,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -1371,7 +1371,7 @@ namespace cudahip2d {
                             dx_ov_dt, dy_ov_dt,                                                                                        
                             i_domain_begin, j_domain_begin,                                                                            
                             nprimy,                                                                                                    
-                            not_spectral                                                                                               
+                            not_spectral_                                                                                               
                        );                                                                                                              
         checkHIPErrors( ::cudaDeviceSynchronize() );  
 #endif 
@@ -1409,7 +1409,7 @@ namespace cudahip2d {
 //                         int    i_domain_begin,
 //                         int    j_domain_begin,
 //                         int    nprimy,
-//                         int    not_spectral )
+//                         int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -1432,7 +1432,7 @@ namespace cudahip2d {
 //                                 dx_ov_dt, dy_ov_dt,
 //                                 i_domain_begin, j_domain_begin,
 //                                 nprimy,
-//                                 not_spectral );
+//                                 not_spectral_ );
 //}
 //
 ////! Project global current and charge densities (EMfields->Jx_/Jy_/Jz_/rho_)
@@ -1465,7 +1465,7 @@ namespace cudahip2d {
 //                                   int    i_domain_begin,
 //                                   int    j_domain_begin,
 //                                   int    nprimy,
-//                                   int    not_spectral )
+//                                   int    not_spectral_ )
 //{
 //    #if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
 //    naive:: // the naive, OMP version serves as a reference along with the CPU version
@@ -1488,6 +1488,6 @@ namespace cudahip2d {
 //                                           dx_ov_dt, dy_ov_dt,
 //                                           i_domain_begin, j_domain_begin,
 //                                           nprimy,
-//                                           not_spectral );
+//                                           not_spectral_ );
 //}
 
diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
old mode 100644
new mode 100755
index d607a4ab4..d789796ab
--- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h
@@ -3,8 +3,7 @@
 #ifndef Projector2D2OrderGPUKernelCUDAHIP_H
 #define Projector2D2OrderGPUKernelCUDAHIP_H
 
-
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
     #include <hip/hip_runtime.h>
@@ -20,8 +19,7 @@
 
 namespace cudahip2d {
 //static
-void
-    currentDepositionKernel2D( double *__restrict__ host_Jx,
+void currentDepositionKernel2D( double *__restrict__ host_Jx,
                                double *__restrict__ host_Jy,
                                double *__restrict__ host_Jz,
                                int Jx_size,
@@ -46,11 +44,10 @@ void
                                int    i_domain_begin,
                                int    j_domain_begin,
                                int    nprimy,
-                               int    not_spectral );
+                               int    not_spectral_ );
 
 //static 
-inline void
-    currentAndDensityDepositionKernel(
+void currentAndDensityDepositionKernel2D(
                                 double *__restrict__ host_Jx,
                                 double *__restrict__ host_Jy,
                                 double *__restrict__ host_Jz,
@@ -78,7 +75,7 @@ inline void
                                 int    i_domain_begin,
                                 int    j_domain_begin,
                                 int    nprimy,
-                                int    not_spectral );
+                                int    not_spectral_ );
 
 } // namespace cudahip2d
 
diff --git a/src/Projector/Projector3D2OrderGPU.cpp b/src/Projector/Projector3D2OrderGPU.cpp
index 39342b204..f27d7b1e1 100755
--- a/src/Projector/Projector3D2OrderGPU.cpp
+++ b/src/Projector/Projector3D2OrderGPU.cpp
@@ -25,18 +25,18 @@ Projector3D2OrderGPU::Projector3D2OrderGPU( Params &parameters, Patch *a_patch )
     // initialize it's member variable) we better initialize
     // Projector2D2OrderGPU's member variable after explicitly initializing
     // Projector2D.
-    not_spectral  = !parameters.is_pxr;
+    not_spectral_  = !parameters.is_pxr;
     dt   = parameters.timestep;
     dts2 = dt / 2.0;
     dts4 = dts2 / 2.0;
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC )
     // When sorting is disabled, these values are invalid (-1) and the HIP
     // implementation can't be used.
     x_dimension_bin_count_ = parameters.getGPUBinCount( 1 );
     y_dimension_bin_count_ = parameters.getGPUBinCount( 2 );
     z_dimension_bin_count_ = parameters.getGPUBinCount( 3 );
-//#elif defined( SMILEI_OPENACC_MODE )
+//#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
 //    x_dimension_bin_count_ = 1;
 //    y_dimension_bin_count_ = 1;
 //    z_dimension_bin_count_ = 1;
@@ -50,7 +50,7 @@ Projector3D2OrderGPU::~Projector3D2OrderGPU()
     // EMPTY
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 extern "C" void
 currentDeposition3DOnDevice( double *__restrict__ Jx,
                          double *__restrict__ Jy,
@@ -122,6 +122,8 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
     /// Project global current densities (EMfields->Jx_/Jy_/Jz_)
     ///
     /* inline */ void
+
+#if defined( SMILEI_ACCELERATOR_GPU )
     currents( double *__restrict__ Jx,
               double *__restrict__ Jy,
               double *__restrict__ Jz,
@@ -150,72 +152,77 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
               double,
               int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )
         currentDeposition3DOnDevice( Jx,
-                                 Jy,
-                                 Jz,
-                                 Jx_size,
-                                 Jy_size,
-                                 Jz_size,
-                                 particles.getPtrPosition( 0 ),
-                                 particles.getPtrPosition( 1 ),
-                                 particles.getPtrPosition( 2 ),
-                                 particles.getPtrCharge(),
-                                 particles.getPtrWeight(),
-                                 particles.last_index.data(),
-                                 x_dimension_bin_count,
-                                 y_dimension_bin_count,
-                                 z_dimension_bin_count,
-                                 invgf_,
-                                 iold_,
-                                 deltaold_,
-                                 particles.deviceSize(),
-                                 inv_cell_volume,
-                                 dx_inv,
-                                 dy_inv,
-                                 dz_inv,
-                                 dx_ov_dt,
-                                 dy_ov_dt,
-                                 dz_ov_dt,
-                                 i_domain_begin,
-                                 j_domain_begin,
-                                 k_domain_begin,
-                                 nprimy, nprimz,
-                                 not_spectral );
+                                     Jy,
+                                     Jz,
+                                     Jx_size,
+                                     Jy_size,
+                                     Jz_size,
+                                     particles.getPtrPosition( 0 ),
+                                     particles.getPtrPosition( 1 ),
+                                     particles.getPtrPosition( 2 ),
+                                     particles.getPtrCharge(),
+                                     particles.getPtrWeight(),
+                                     particles.last_index.data(),
+                                     x_dimension_bin_count,
+                                     y_dimension_bin_count,
+                                     z_dimension_bin_count,
+                                     invgf_,
+                                     iold_,
+                                     deltaold_,
+                                     particles.deviceSize(),
+                                     inv_cell_volume,
+                                     dx_inv,
+                                     dy_inv,
+                                     dz_inv,
+                                     dx_ov_dt,
+                                     dy_ov_dt,
+                                     dz_ov_dt,
+                                     i_domain_begin,
+                                     j_domain_begin,
+                                     k_domain_begin,
+                                     nprimy, nprimz,
+                                     not_spectral );
+    }
 #else
+    currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int,
+              Particles   &, unsigned int , unsigned int , unsigned int , const double *__restrict__ ,
+              const int    *__restrict__ , const double *__restrict__ , double , double , double , double ,
+              double , double , double , int    , int    , int    , int    ,  int    , double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 
     //! Project density
     /* inline */ void
+#if defined( SMILEI_ACCELERATOR_GPU )
     density( 
-                        double *__restrict__ rho,
-                        int rho_size,
-                        Particles   &particles,
-                        unsigned int x_dimension_bin_count,
-                        unsigned int y_dimension_bin_count,
-                        unsigned int z_dimension_bin_count,
-                        const double *__restrict__ invgf_,
-                        const int *__restrict__ iold_,
-                        const double *__restrict__ deltaold_,
-                        double inv_cell_volume,
-                        double dx_inv,
-                        double dy_inv,
-                        double dz_inv,
-                        double dx_ov_dt,
-                        double dy_ov_dt,
-                        double dz_ov_dt,
-                        int    i_domain_begin,
-                        int    j_domain_begin,
-                        int    k_domain_begin,
-                        int    nprimy,
-                        int    nprimz,
-                        double,
-                        int not_spectral )
+        double *__restrict__ rho,
+        int rho_size,
+        Particles   &particles,
+        unsigned int x_dimension_bin_count,
+        unsigned int y_dimension_bin_count,
+        unsigned int z_dimension_bin_count,
+        const double *__restrict__ invgf_,
+        const int *__restrict__ iold_,
+        const double *__restrict__ deltaold_,
+        double inv_cell_volume,
+        double dx_inv,
+        double dy_inv,
+        double dz_inv,
+        double dx_ov_dt,
+        double dy_ov_dt,
+        double dz_ov_dt,
+        int    i_domain_begin,
+        int    j_domain_begin,
+        int    k_domain_begin,
+        int    nprimy,
+        int    nprimz,
+        double,
+        int not_spectral )
     {
-#if defined( SMILEI_ACCELERATOR_MODE )
         densityDeposition3DOnDevice( 
                                  rho,
                                  rho_size,
@@ -244,10 +251,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy
                                  k_domain_begin,
                                  nprimy, nprimz,
                                  not_spectral );
+    }
 #else
+    density( double *__restrict__ , int , Particles   &, unsigned int , unsigned int , unsigned int ,
+             const double *__restrict__ , const int *__restrict__ , const double *__restrict__ ,
+             double , double , double , double , double , double , double ,
+             int, int, int, int, int, double, int )
+    {
         SMILEI_ASSERT( false );
-#endif
     }
+#endif
 
 } // namespace
 
@@ -255,7 +268,7 @@ void Projector3D2OrderGPU::basic( double      *rhoj,
                                   Particles   &particles,
                                   unsigned int ipart,
                                   unsigned int type,
-                                  int bin_shift )
+                                  int /*bin_shift*/ )
 {
 
 
@@ -347,12 +360,12 @@ void Projector3D2OrderGPU::basic( double      *rhoj,
     }
 }
 
-void Projector3D2OrderGPU::ionizationCurrents( Field      *Jx,
-                                               Field      *Jy,
-                                               Field      *Jz,
-                                               Particles  &particles,
-                                               int         ipart,
-                                               LocalFields Jion )
+void Projector3D2OrderGPU::ionizationCurrents( Field      */*Jx*/,
+                                               Field      */*Jy*/,
+                                               Field      */*Jz*/,
+                                               Particles  &/*particles*/,
+                                               int         /*ipart*/,
+                                               LocalFields /*Jion */)
 {
     ERROR( "Projector3D2OrderGPU::ionizationCurrents(): Not implemented !" );
 }
@@ -366,8 +379,8 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                                                       bool diag_flag,
                                                       bool is_spectral,
                                                       int  ispec,
-                                                      int  icell,
-                                                      int  ipart_ref )
+                                                      int  /*icell*/,
+                                                      int  /*ipart_ref*/ )
 {
 
     if( is_spectral ) {
@@ -401,7 +414,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                 i_domain_begin_, j_domain_begin_, k_domain_begin_,
                 nprimy, nprimz,
                 one_third,
-                not_spectral );
+                not_spectral_ );
 
         double *const __restrict__ b_rho  = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->data() : EMfields->rho_->data();
         unsigned int rho_size             = EMfields->rho_s[ispec] ? EMfields->rho_s[ispec]->size() : EMfields->rho_->size();
@@ -416,7 +429,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                   i_domain_begin_, j_domain_begin_, k_domain_begin_,
                   nprimy, nprimz,
                   one_third,
-                  not_spectral );
+                  not_spectral_ );
 
     // If requested performs then the charge density deposition
     } else {
@@ -440,7 +453,7 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
                 i_domain_begin_, j_domain_begin_, k_domain_begin_,
                 nprimy, nprimz,
                 one_third,
-                not_spectral );
+                not_spectral_ );
     }
 
         // TODO(Etienne M): DIAGS. Find a way to get rho. We could:
@@ -463,15 +476,15 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields,
        //std::cerr << sum << " " << sum2 << " " << sum_Jxs << " " << sum_Jx << std::endl;
 }
 
-void Projector3D2OrderGPU::susceptibility( ElectroMagn *EMfields,
-                                           Particles   &particles,
-                                           double       species_mass,
-                                           SmileiMPI   *smpi,
-                                           int          istart,
-                                           int          iend,
-                                           int          ithread,
-                                           int          icell,
-                                           int          ipart_ref )
+void Projector3D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/,
+                                           Particles   &/*particles*/,
+                                           double       /*species_mass*/,
+                                           SmileiMPI   */*smpi*/,
+                                           int          /*istart*/,
+                                           int          /*iend*/,
+                                           int          /*ithread*/,
+                                           int          /*icell*/,
+                                           int          /*ipart_ref */)
 {
     ERROR( "Projector3D2OrderGPU::susceptibility(): Not implemented !" );
 }
diff --git a/src/Projector/Projector3D2OrderGPU.cpp.backup b/src/Projector/Projector3D2OrderGPU.cpp.backup
index 39ce7a4a5..761e6ae31 100755
--- a/src/Projector/Projector3D2OrderGPU.cpp.backup
+++ b/src/Projector/Projector3D2OrderGPU.cpp.backup
@@ -2,7 +2,7 @@
 
 #include <cmath>
 #include <iostream>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #include <accelmath.h>
     #include <openacc.h>
 #endif
@@ -136,7 +136,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx0 [0:kTmpArraySize],   \
@@ -262,7 +262,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] )
 
     // #pragma acc parallel deviceptr( DSx, sumX )
@@ -287,7 +287,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jx [0:Jx_size],         \
                                   Sy0 [0:kTmpArraySize],   \
@@ -310,7 +310,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double crx_p = dx_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -326,7 +326,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jx [ jdx ] += val;
@@ -339,7 +339,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSy [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -365,7 +365,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jy [0:Jy_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -388,7 +388,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double cry_p = dy_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -404,7 +404,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jy [ jdx ] += val;
@@ -417,7 +417,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSz [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -443,7 +443,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jz [0:Jz_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -466,7 +466,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
             const double crz_p = dz_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
             
             const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=1 ; k<5 ; k++ ) {
@@ -482,7 +482,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
 
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc atomic
 #endif
                         Jz[ jdx ] += val;
@@ -498,7 +498,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
           #pragma acc parallel present( iold [0:3 * nparts], \
                                   rho [0:rho_size],          \
                                   Sx1 [0:kTmpArraySize],     \
@@ -523,7 +523,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU(
                            int jdx = idx + k;
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
                            #pragma omp atomic update
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                            #pragma acc atomic
 #endif
                            rho[ jdx ] += charge_weight * Sx1[ipart_pack+i*packsize]*Sy1[ipart_pack+j*packsize]*Sz1[ipart_pack+k*packsize];
diff --git a/src/Projector/Projector3D2OrderGPU.h b/src/Projector/Projector3D2OrderGPU.h
index 2fac2402e..c8ebd0ae8 100755
--- a/src/Projector/Projector3D2OrderGPU.h
+++ b/src/Projector/Projector3D2OrderGPU.h
@@ -46,21 +46,21 @@ class Projector3D2OrderGPU : public Projector3D
                                     int          ipart_ref = 0 ) override;
 
     //!Wrapper for task-based implementation of Smilei
-    void currentsAndDensityWrapperOnBuffers( double *b_Jx,
-                                             double *b_Jy,
-                                             double *b_Jz,
-                                             double *b_rho,
-                                             int bin_width,
-                                             Particles &particles,
-                                             SmileiMPI *smpi,
-                                             int istart,
-                                             int iend,
-                                             int ithread,
-                                             bool diag_flag,
-                                             bool is_spectral,
-                                             int ispec,
-                                             int icell = 0,
-                                             int ipart_ref = 0 ) override {};
+    void currentsAndDensityWrapperOnBuffers( double *   /*b_Jx*/,
+                                             double *   /*b_Jy*/,
+                                             double *   /*b_Jz*/,
+                                             double *   /*b_rho*/,
+                                             int        /*bin_width*/,
+                                             Particles &/*particles*/,
+                                             SmileiMPI */*smpi*/,
+                                             int        /*istart*/,
+                                             int        /*iend*/,
+                                             int        /*ithread*/,
+                                             bool       /*diag_flag*/,
+                                             bool       /*is_spectral*/,
+                                             int        /*ispec*/,
+                                             int        /*icell*/ = 0,
+                                             int        /*ipart_ref*/ = 0 ) override {};
 
     /// Project susceptibility, used as source term in envelope equation
     ///
@@ -78,7 +78,7 @@ class Projector3D2OrderGPU : public Projector3D
     double dt;
     double dts2;
     double dts4;
-    int    not_spectral;
+    int    not_spectral_;
     unsigned int x_dimension_bin_count_;
     unsigned int y_dimension_bin_count_;
     unsigned int z_dimension_bin_count_;
diff --git a/src/Projector/Projector3D2OrderGPUKernel.cpp b/src/Projector/Projector3D2OrderGPUKernel.cpp
index f77a4fda3..f9465dc2a 100644
--- a/src/Projector/Projector3D2OrderGPUKernel.cpp
+++ b/src/Projector/Projector3D2OrderGPUKernel.cpp
@@ -5,7 +5,7 @@
 // issues (!).
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
     //! Simple switch to jump between the reference (omp) implementation and the
     //! hip one.
@@ -71,7 +71,6 @@ currentDeposition3DOnDevice( double *__restrict__ host_Jx,
                            int    nprimz,
                            int    not_spectral )
 {
-    //	printf("We are doing current deposition on GPU \n");
     //#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //acc:: // OpenMP or OpenACC version serves as a reference along with the CPU version
     //#else
@@ -134,8 +133,6 @@ densityDeposition3DOnDevice(
                                      int    nprimz,
                                      int    not_spectral )
 {
-  //printf("We are doing a densitydeposition on GPU \n");
-
     	//#if defined( PRIVATE_SMILEI_USE_OPENMP_PROJECTION_IMPLEMENTATION )
     //acc:: // OpenMP or OpenACC version serves as a reference along with the CPU version
     //#else
diff --git a/src/Projector/Projector3D2OrderGPUKernelAcc.h b/src/Projector/Projector3D2OrderGPUKernelAcc.h
index 9cf3b224d..43bff1cce 100644
--- a/src/Projector/Projector3D2OrderGPUKernelAcc.h
+++ b/src/Projector/Projector3D2OrderGPUKernelAcc.h
@@ -1,6 +1,6 @@
 //! Optimized Acc projection (from Julien Derouillat) 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include <cmath>
 #include "Tools.h"
@@ -110,7 +110,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx0 [0:kTmpArraySize],   \
@@ -236,7 +236,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] )
 
     // #pragma acc parallel deviceptr( DSx, sumX )
@@ -261,7 +261,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jx [0:Jx_size],         \
                                   Sy0 [0:kTmpArraySize],   \
@@ -284,7 +284,7 @@ namespace acc {
             const double crx_p = dx_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -309,7 +309,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSy [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -335,7 +335,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jy [0:Jy_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -358,7 +358,7 @@ namespace acc {
             const double cry_p = dy_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=0 ; k<5 ; k++ ) {
@@ -383,7 +383,7 @@ namespace acc {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( DSz [0:kTmpArraySize], \
                                   sumX [0:kTmpArraySize] )
 
@@ -409,7 +409,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   Jz [0:Jz_size],         \
                                   Sx0 [0:kTmpArraySize],   \
@@ -432,7 +432,7 @@ namespace acc {
             const double crz_p = dz_ov_dt_inv_cell_volume * static_cast<double>( charge[ipart] ) * weight[ipart];
 
             const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize];
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             #pragma acc loop vector
 #endif
             for( int k=1 ; k<5 ; k++ ) {
@@ -536,7 +536,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Sx1 [0:kTmpArraySize],   \
@@ -630,7 +630,7 @@ namespace acc {
                                       charge /* [istart_pack:current_pack_size] */, \
                                       weight /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
           #pragma acc parallel present( iold [0:3 * nparts], \
                                   rho [0:rho_size],          \
                                   Sx1 [0:kTmpArraySize],     \
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
old mode 100644
new mode 100755
index 195a02667..0883bdafd
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu
@@ -1,6 +1,6 @@
 //! HIP CUDA implementation
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 //#include "Projector3D2OrderGPUKernelCUDAHIP.h"
 
@@ -162,7 +162,7 @@ namespace cudahip {
                                          int          k_domain_begin,
                                          int          nprimy,
                                          int          nprimz,
-                                         int          not_spectral )
+                                         int          not_spectral_ )
         {
             // Potential future work for optimization: Break the kernel into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -238,9 +238,6 @@ namespace cudahip {
                                                                                      device_bin_index[workgroup_dedicated_bin_index - 1];
             const unsigned int last_particle  = device_bin_index[workgroup_dedicated_bin_index];
 
-//std::cout << first_particle << std::endl;
-//printf("%d \n",first_particle);
-
             for( unsigned int particle_index = first_particle + thread_index_offset;
                  particle_index < last_particle;
                  particle_index += loop_stride ) {
@@ -501,8 +498,8 @@ namespace cudahip {
 
                 // These atomics are basically free (very few of them).
                 atomic::GDS::AddNoReturn( &device_Jx[global_memory_index],                                                                                             static_cast<double>( Jx_scratch_space[field_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * global_x_scratch_space_coordinate * nprimz], static_cast<double>( Jy_scratch_space[field_index] ) );
-                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index + /* We handle the FTDT/picsar */ not_spectral * (global_x_scratch_space_coordinate * nprimy + global_y_scratch_space_coordinate)],                                                                                             static_cast<double>(  Jz_scratch_space[field_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jy[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * global_x_scratch_space_coordinate * nprimz], static_cast<double>( Jy_scratch_space[field_index] ) );
+                atomic::GDS::AddNoReturn( &device_Jz[global_memory_index + /* We handle the FTDT/picsar */ not_spectral_ * (global_x_scratch_space_coordinate * nprimy + global_y_scratch_space_coordinate)],                                                                                             static_cast<double>(  Jz_scratch_space[field_index] ) );
             }
         } // end DepositCurrent
 
@@ -536,7 +533,7 @@ namespace cudahip {
                                             int          k_domain_begin,
                                             int          nprimy,
                                             int          nprimz,
-                                            int          not_spectral )
+                                            int          not_spectral_ )
         {
             // TODO(Etienne M): refactor this function. Break it into smaller
             // pieces (lds init/store, coeff computation, deposition etc..)
@@ -716,7 +713,7 @@ namespace cudahip {
                                int    k_domain_begin,
                                int    nprimy,
                                int    nprimz,
-                               int    not_spectral )
+                               int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 3 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -767,7 +764,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral 
+                            not_spectral_ 
                         );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
@@ -799,7 +796,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
@@ -836,7 +833,7 @@ namespace cudahip {
                                 int    k_domain_begin,
                                 int    nprimy,
                                 int    nprimz,
-                                int    not_spectral )
+                                int    not_spectral_ )
     {
         SMILEI_ASSERT( Params::getGPUClusterWidth( 3 /* 2D */ ) != -1 &&
                        Params::getGPUClusterGhostCellBorderWidth( 2 /* 2nd order interpolation */ ) != -1 );
@@ -886,7 +883,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral );
+                            not_spectral_ );
 
         checkHIPErrors( ::hipDeviceSynchronize() );
 #elif defined (  __NVCC__ )
@@ -914,7 +911,7 @@ namespace cudahip {
                             dx_ov_dt, dy_ov_dt, dz_ov_dt,
                             i_domain_begin, j_domain_begin, k_domain_begin,
                             nprimy, nprimz,
-                            not_spectral
+                            not_spectral_
                        );
         checkHIPErrors( ::cudaDeviceSynchronize() );
 #endif
diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
old mode 100644
new mode 100755
index 94368f4dd..cbd9729c3
--- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
+++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h
@@ -4,7 +4,7 @@
 #define Projector3D2OrderGPUKernelCUDAHIP_H
 
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #if defined( __HIP__ )
     #include <hip/hip_runtime.h>
@@ -17,9 +17,8 @@
 #include "gpu.h"
 
 namespace cudahip {
-//static
-inline void
-    currentDepositionKernel3D( double *__restrict__ host_Jx,
+//static inline
+void currentDepositionKernel3D( double *__restrict__ host_Jx,
                                double *__restrict__ host_Jy,
                                double *__restrict__ host_Jz,
                                int Jx_size,
@@ -50,11 +49,10 @@ inline void
                                int    k_domain_begin,
                                int    nprimy,
                                int    nprimz,
-                               int    not_spectral );
+                               int    not_spectral_ );
 
-//static 
-inline void
-    densityDepositionKernel3D(
+//static inline
+void densityDepositionKernel3D(
                                 double *__restrict__ host_rho,
                                 int rho_size,
                                 const double *__restrict__ device_particle_position_x,
@@ -82,7 +80,7 @@ inline void
                                 int    k_domain_begin,
                                 int    nprimy,
                                 int    nprimz,
-                                int    not_spectral );
+                                int    not_spectral_ );
 
 } // namespace cudahip
 
diff --git a/src/Projector/Projector3D2OrderGPUKernelNaive.h b/src/Projector/Projector3D2OrderGPUKernelNaive.h
index b6cfac080..a261af40b 100644
--- a/src/Projector/Projector3D2OrderGPUKernelNaive.h
+++ b/src/Projector/Projector3D2OrderGPUKernelNaive.h
@@ -1,6 +1,6 @@
 //! Naive ACC/OMP implementation
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 
 #include <cmath>
 #include "Tools.h"
@@ -66,7 +66,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   Jx[0:Jx_size], \
@@ -344,7 +344,7 @@ namespace acc {
                                       position_y /* [istart_pack:current_pack_size] */, \
                                       position_z /* [istart_pack:current_pack_size] */ )
     #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc parallel present( iold [0:3 * nparts],     \
                                   deltaold [0:3 * nparts], \
                                   rho[0:rho_size] \
diff --git a/src/Projector/ProjectorAM2OrderV.cpp b/src/Projector/ProjectorAM2OrderV.cpp
index b222aa4ee..890d37332 100755
--- a/src/Projector/ProjectorAM2OrderV.cpp
+++ b/src/Projector/ProjectorAM2OrderV.cpp
@@ -673,10 +673,6 @@ void ProjectorAM2OrderV::susceptibility( ElectroMagn *EMfields, Particles &parti
     double charge_weight[8] __attribute__( ( aligned( 64 ) ) );
     // double r_bar[8] __attribute__( ( aligned( 64 ) ) );
 
-    //double *invR_local = &(invR_[jpom2]);
-    // double *invRd_local = &(invRd_[jpom2]);
-
-    double *invR_local = &(invR_[jpom2]);
     // Pointer for GPU and vectorization on ARM processors
     double * __restrict__ position_x = particles.getPtrPosition(0);
     double * __restrict__ position_y = particles.getPtrPosition(1);
diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h
index db8c39e1f..5b1f50e37 100755
--- a/src/Projector/ProjectorFactory.h
+++ b/src/Projector/ProjectorFactory.h
@@ -3,6 +3,7 @@
 
 #include "Projector.h"
 #include "Projector1D2Order.h"
+#include "Projector1D2OrderGPU.h"
 #include "Projector1D4Order.h"
 #include "Projector2D2Order.h"
 #include "Projector2D2OrderGPU.h"
@@ -33,7 +34,11 @@ class ProjectorFactory
         // 1Dcartesian simulation
         // ---------------
         if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
-            Proj = new Projector1D2Order( params, patch );
+            #if defined( SMILEI_ACCELERATOR_GPU )
+                Proj = new Projector1D2OrderGPU( params, patch );
+            #else
+                Proj = new Projector1D2Order( params, patch );
+            #endif
         } else if( ( params.geometry == "1Dcartesian" ) && ( params.interpolation_order == ( unsigned int )4 ) ) {
             Proj = new Projector1D4Order( params, patch );
         }
@@ -42,7 +47,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "2Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+                #if defined( SMILEI_ACCELERATOR_GPU )
                     Proj = new Projector2D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector2D2Order( params, patch );
@@ -64,7 +69,7 @@ class ProjectorFactory
         // ---------------
         else if( ( params.geometry == "3Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) {
             if( !vectorization ) {
-                #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+                #if defined( SMILEI_ACCELERATOR_GPU )
                     Proj = new Projector3D2OrderGPU( params, patch );
                 #else
                     Proj = new Projector3D2Order( params, patch );
diff --git a/src/Pusher/PusherBoris.cpp b/src/Pusher/PusherBoris.cpp
index 536def7a9..8f70a6cc3 100755
--- a/src/Pusher/PusherBoris.cpp
+++ b/src/Pusher/PusherBoris.cpp
@@ -57,7 +57,7 @@ void PusherBoris::operator()( Particles &particles, SmileiMPI *smpi, int istart,
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherBorisNR.cpp b/src/Pusher/PusherBorisNR.cpp
index 84f072e1f..df4a3277b 100755
--- a/src/Pusher/PusherBorisNR.cpp
+++ b/src/Pusher/PusherBorisNR.cpp
@@ -57,7 +57,7 @@ void PusherBorisNR::operator()( Particles &particles, SmileiMPI *smpi, int istar
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp
index 2ab234ae1..c85189fff 100755
--- a/src/Pusher/PusherHigueraCary.cpp
+++ b/src/Pusher/PusherHigueraCary.cpp
@@ -68,7 +68,7 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherPhoton.cpp b/src/Pusher/PusherPhoton.cpp
index a94a521e3..5feb7823d 100755
--- a/src/Pusher/PusherPhoton.cpp
+++ b/src/Pusher/PusherPhoton.cpp
@@ -53,7 +53,7 @@ void PusherPhoton::operator()( Particles &particles, SmileiMPI *smpi,
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_ref;
     const int particle_number = iend - istart;
 
diff --git a/src/Pusher/PusherPonderomotiveBoris.cpp b/src/Pusher/PusherPonderomotiveBoris.cpp
index 41afa42e6..9d151dabb 100755
--- a/src/Pusher/PusherPonderomotiveBoris.cpp
+++ b/src/Pusher/PusherPonderomotiveBoris.cpp
@@ -55,7 +55,7 @@ void PusherPonderomotiveBoris::operator()( Particles &particles, SmileiMPI *smpi
     const double *const __restrict__ GradPhiz = &( ( *GradPhipart )[2*nparts] );
     //double *inv_gamma_ponderomotive = &( ( *dynamics_inv_gamma_ponderomotive )[0*nparts] );
     
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
     #else
         int np = iend-istart;
diff --git a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
index 379f41763..a32f359cb 100644
--- a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
+++ b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp
@@ -31,7 +31,6 @@ void PusherPonderomotiveBorisBTIS3::operator()( Particles &particles, SmileiMPI
     double charge_over_mass_dts2, charge_sq_over_mass_sq_dts4;
     double umx, umy, umz, upx, upy, upz;
     double alpha;
-    double TxTy, TyTz, TzTx;
     double pxsm, pysm, pzsm;
     //double one_ov_gamma_ponderomotive;
     
diff --git a/src/Pusher/PusherPonderomotivePositionBoris.cpp b/src/Pusher/PusherPonderomotivePositionBoris.cpp
index 16a4e6c69..9b9bea639 100755
--- a/src/Pusher/PusherPonderomotivePositionBoris.cpp
+++ b/src/Pusher/PusherPonderomotivePositionBoris.cpp
@@ -52,7 +52,7 @@ void PusherPonderomotivePositionBoris::operator()( Particles &particles, SmileiM
     const double *const __restrict__ GradPhi_my = &( ( *GradPhi_mpart )[1*nparts] );
     const double *const __restrict__ GradPhi_mz = &( ( *GradPhi_mpart )[2*nparts] );
     
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
     #else
         int np = iend-istart;
diff --git a/src/Pusher/PusherVay.cpp b/src/Pusher/PusherVay.cpp
index c1ba76693..83debaae4 100755
--- a/src/Pusher/PusherVay.cpp
+++ b/src/Pusher/PusherVay.cpp
@@ -67,7 +67,7 @@ void PusherVay::operator()( Particles &particles, SmileiMPI *smpi, int istart, i
                        position_y /* [istart:particle_number] */,             \
                        position_z /* [istart:particle_number] */ )
     #pragma omp teams distribute parallel for
-#elif defined(SMILEI_OPENACC_MODE)
+#elif defined(SMILEI_ACCELERATOR_GPU_OACC)
     const int istart_offset   = istart - ipart_buffer_offset;
     const int particle_number = iend - istart;
 
diff --git a/src/Python/pyinit.py b/src/Python/pyinit.py
index 56febc475..f5aeeb7e1 100755
--- a/src/Python/pyinit.py
+++ b/src/Python/pyinit.py
@@ -645,7 +645,8 @@ class MultiphotonBreitWheeler(SmileiComponent):
 # Smilei-defined
 smilei_mpi_rank = 0
 smilei_mpi_size = 1
-smilei_rand_max = 2**31-1
+smilei_omp_threads = 1
+smilei_total_cores = 1
 
 # Variable to set to False for the actual run (useful for the test mode)
 _test_mode = True
diff --git a/src/Python/pyprofiles.py b/src/Python/pyprofiles.py
index 0e122a1a9..2fff14c1f 100755
--- a/src/Python/pyprofiles.py
+++ b/src/Python/pyprofiles.py
@@ -702,7 +702,7 @@ def LaserGaussianAM( box_side="xmin", a0=1., omega=1., focus=None, waist=3.,
         print("ERROR: focus should be a list of length 1")
         exit(1)
     elif (len(focus)==2):
-        print("WARNING: deprecated focus in LaserEnvelopeGaussianAM should be a list of length 1")
+        print("WARNING: deprecated focus in LaserGaussianAM should be a list of length 1")
     # Polarization and amplitude
     [dephasing, amplitudeY, amplitudeZ] = transformPolarization(polarization_phi, ellipticity)
     amplitudeY *= a0 * omega
diff --git a/src/Radiation/RadiationCorrLandauLifshitz.cpp b/src/Radiation/RadiationCorrLandauLifshitz.cpp
index 16c7b01fe..ebb0e54dd 100755
--- a/src/Radiation/RadiationCorrLandauLifshitz.cpp
+++ b/src/Radiation/RadiationCorrLandauLifshitz.cpp
@@ -96,7 +96,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // cumulative Radiated energy from istart to iend
     double radiated_energy_loc = 0;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // Local vector to store the radiated energy
     double * rad_norm_energy = new double [iend-istart];
     // double * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double));
@@ -112,7 +112,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // Computation
 
     // NVIDIA GPUs
-    #if defined (SMILEI_OPENACC_MODE)
+    #if defined (SMILEI_ACCELERATOR_GPU_OACC)
         const int istart_offset   = istart - ipart_ref;
         const int np = iend-istart;
         #pragma acc parallel \
@@ -185,7 +185,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation of the thread radiated energy
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
             // Exact energy loss due to the radiation
             rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0
@@ -210,7 +210,7 @@ void RadiationCorrLandauLifshitz::operator()(
     // _______________________________________________________________
     // Update of the quantum parameter
     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     #pragma omp simd
     for( int ipart=istart ; ipart<iend; ipart++ ) {
 #endif
@@ -229,7 +229,7 @@ void RadiationCorrLandauLifshitz::operator()(
                        Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                        Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
     } // end loop ipart
     #else
             } // end if
@@ -241,7 +241,7 @@ void RadiationCorrLandauLifshitz::operator()(
     radiated_energy += radiated_energy_loc;
 
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // _______________________________________________________________
     // Cleaning
 
diff --git a/src/Radiation/RadiationLandauLifshitz.cpp b/src/Radiation/RadiationLandauLifshitz.cpp
index e8e3f0a91..a44bce0b3 100755
--- a/src/Radiation/RadiationLandauLifshitz.cpp
+++ b/src/Radiation/RadiationLandauLifshitz.cpp
@@ -93,7 +93,7 @@ void RadiationLandauLifshitz::operator()(
     // cumulative Radiated energy from istart to iend
     double radiated_energy_loc = 0;
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     // Local vector to store the radiated energy
     double * rad_norm_energy = new double [iend-istart];
     //double  * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double));
@@ -109,7 +109,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd aligned(rad_norm_energy:64)
     #else
         int np = iend-istart;
@@ -153,7 +153,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Computation of the thread radiated energy
                                               
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
             // Exact energy loss due to the radiation
             rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0
@@ -178,7 +178,7 @@ void RadiationLandauLifshitz::operator()(
     // _______________________________________________________________
     // Update of the quantum parameter
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     #pragma omp simd
     for( int ipart=istart ; ipart<iend; ipart++ ) {
 #endif
@@ -198,7 +198,7 @@ void RadiationLandauLifshitz::operator()(
                      Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                      Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     } // end loop ipart
 #else
             } // end if
@@ -209,7 +209,7 @@ void RadiationLandauLifshitz::operator()(
     radiated_energy += radiated_energy_loc;
 
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
     // _______________________________________________________________
     // Cleaning
diff --git a/src/Radiation/RadiationMonteCarlo.cpp b/src/Radiation/RadiationMonteCarlo.cpp
index d1a2ed6ff..53ce5c602 100755
--- a/src/Radiation/RadiationMonteCarlo.cpp
+++ b/src/Radiation/RadiationMonteCarlo.cpp
@@ -14,7 +14,7 @@
 #include <cstring>
 #include <fstream>
 
-#if defined(SMILEI_OPENACC_MODE)
+#if defined(SMILEI_ACCELERATOR_GPU_OACC)
     #define __HIP_PLATFORM_NVCC__
     #define __HIP_PLATFORM_NVIDIA__
     #include "gpuRandom.h"
@@ -103,7 +103,7 @@ void RadiationMonteCarlo::operator()(
     // Temporary double parameter
     double temp;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     unsigned long long seed; // Parameters for CUDA generator
     unsigned long long seq;
     unsigned long long offset;
@@ -152,7 +152,7 @@ void RadiationMonteCarlo::operator()(
 
     // Number of photons
     int nphotons;
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     int nphotons_start;
 #endif
     
@@ -160,7 +160,7 @@ void RadiationMonteCarlo::operator()(
     const double photon_buffer_size_per_particle = radiation_photon_sampling_ * max_photon_emissions_;
     
     if (photons) {
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             // We reserve a large number of potential photons on device since we can't reallocate
             nphotons_start = photons->deviceSize();
             //static_cast<nvidiaParticles*>(photons)->deviceReserve( nphotons + (iend - istart) * photon_buffer_size_per_particle );
@@ -199,13 +199,13 @@ void RadiationMonteCarlo::operator()(
 
     double *const __restrict__ photon_tau = photons ? (photons->has_Monte_Carlo_process ? photons->getPtrTau() : nullptr) : nullptr;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Cell keys as a mask
     int *const __restrict__ photon_cell_keys = photons ? photons->getPtrCellKeys() : nullptr;
 #endif
 
     // Table properties ----------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Size of tables
     // int size_of_Table_integfochi = RadiationTables.integfochi_.size_particle_chi_;
     // int size_of_Table_min_photon_chi = RadiationTables.xi_.size_particle_chi_;
@@ -221,7 +221,7 @@ void RadiationMonteCarlo::operator()(
 
     // _______________________________________________________________
     // Computation
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     // Management of the data on GPU though this data region
     int np = iend-istart;
     
@@ -342,7 +342,7 @@ void RadiationMonteCarlo::operator()(
                 // New final optical depth to reach for emision
                 while( tau[ipart] <= epsilon_tau_ ) {
                     //tau[ipart] = -log( 1.-Rand::uniform() );
-                    #ifndef SMILEI_OPENACC_MODE
+                    #ifndef SMILEI_ACCELERATOR_GPU_OACC
                         tau[ipart] = -std::log( 1.-rand_->uniform() );
                     #else
                         seed_curand_1 = (int) (ipart+1)*(initial_seed_1+1); //Seed for linear generator
@@ -385,7 +385,7 @@ void RadiationMonteCarlo::operator()(
 
 
                     // Draw random number in [0,1[
-                    #ifndef SMILEI_OPENACC_MODE
+                    #ifndef SMILEI_ACCELERATOR_GPU_OACC
                         random_number = rand_->uniform();
                     #else
                         seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator
@@ -433,7 +433,7 @@ void RadiationMonteCarlo::operator()(
                             && ( i_photon_emission < max_photon_emissions_)) {
                                 
 // CPU implementation (non-threaded implementation)
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
 
                         // Creation of new photons in the temporary array photons
                         photons->createParticles( radiation_photon_sampling_ );
@@ -611,14 +611,14 @@ void RadiationMonteCarlo::operator()(
         } // end while
     } // end for
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end acc parallel
 #endif
 
     //if (photons) std::cerr << photons->deviceSize()  << std::endl;
 
     // Remove extra space to save memory
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     if (photons) {
         photons->shrinkToFit( true );
     }
@@ -631,7 +631,7 @@ void RadiationMonteCarlo::operator()(
     // ____________________________________________________
     // Update of the quantum parameter chi
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd
 #else
     int np = iend-istart;
@@ -660,11 +660,11 @@ void RadiationMonteCarlo::operator()(
 
         }
 
-    #ifdef SMILEI_OPENACC_MODE
+    #ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end acc parallel
     #endif
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     }   // end acc data
 #endif
 
diff --git a/src/Radiation/RadiationMonteCarlo.h b/src/Radiation/RadiationMonteCarlo.h
index 34b8c31db..4e84f169d 100755
--- a/src/Radiation/RadiationMonteCarlo.h
+++ b/src/Radiation/RadiationMonteCarlo.h
@@ -16,7 +16,7 @@
 #include "Radiation.h"
 #include "userFunctions.h"
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 // This is wrong. Dont include nvidiaParticles, it may cause problem!
 // See particle factory.
diff --git a/src/Radiation/RadiationNiel.cpp b/src/Radiation/RadiationNiel.cpp
index 6e61f3759..dff292df4 100755
--- a/src/Radiation/RadiationNiel.cpp
+++ b/src/Radiation/RadiationNiel.cpp
@@ -127,7 +127,7 @@ void RadiationNiel::operator()(
     double radiated_energy_loc = 0;
     
     // Parameters for linear alleatory number generator
-    #ifdef SMILEI_OPENACC_MODE
+    #ifdef SMILEI_ACCELERATOR_GPU_OACC
 
         // Initialize initial seed for linear generator
         double initial_seed = rand_->uniform();
@@ -144,7 +144,7 @@ void RadiationNiel::operator()(
     //double t0 = MPI_Wtime();
 
     // 1) Vectorized computation of gamma and the particle quantum parameter
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
             #pragma omp simd
 #else
         
@@ -190,12 +190,12 @@ void RadiationNiel::operator()(
                                   Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                                   Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
     
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
         } //finish cycle
 #endif
     //double t1 = MPI_Wtime();
 
-        #ifdef SMILEI_OPENACC_MODE
+        #ifdef SMILEI_ACCELERATOR_GPU_OACC
             if( particle_chi[ipart] > minimum_chi_continuous ) {
 
 		        seed_curand = (int) (ipart+1)*(initial_seed+1); //Seed for linear generator
@@ -297,7 +297,7 @@ void RadiationNiel::operator()(
 
     if( niel_computation_method == 0 ) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
             if( particle_chi[ipart] > minimum_chi_continuous ) {
@@ -310,7 +310,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -318,7 +318,7 @@ void RadiationNiel::operator()(
     // Using the fit at order 5 (vectorized)
     else if( niel_computation_method == 1 ) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp)
 	    for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -329,7 +329,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -338,7 +338,7 @@ void RadiationNiel::operator()(
     // Using the fit at order 10 (vectorized)
     else if( niel_computation_method == 2 ) {
         
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp)
 	    for( ipart=istart ; ipart<iend; ipart++ ) {
                	// Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -348,7 +348,7 @@ void RadiationNiel::operator()(
 
                     	diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -357,7 +357,7 @@ void RadiationNiel::operator()(
     // Using Ridgers
     else if( niel_computation_method == 3) {
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
 	    #pragma omp simd private(temp)
         for( ipart=istart ; ipart<iend; ipart++ ) {
                 // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -368,7 +368,7 @@ void RadiationNiel::operator()(
 
                     diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart];
 
-        #ifndef SMILEI_OPENACC_MODE
+        #ifndef SMILEI_ACCELERATOR_GPU_OACC
             }
         }
         #endif
@@ -378,7 +378,7 @@ void RadiationNiel::operator()(
 
     // 4) Vectorized update of the momentum
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd private(temp,rad_energy)
         for( ipart=istart ; ipart<iend; ipart++ ) {
             // Below particle_chi = minimum_chi_continuous, radiation losses are negligible
@@ -398,7 +398,7 @@ void RadiationNiel::operator()(
                 momentum_y[ipart] -= temp*momentum_y[ipart];
                 momentum_z[ipart] -= temp*momentum_z[ipart];
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         }
     }
     #else
@@ -411,7 +411,7 @@ void RadiationNiel::operator()(
     // Vectorized computation of the thread radiated energy
     // and update of the quantum parameter
 
-    #ifndef SMILEI_OPENACC_MODE
+    #ifndef SMILEI_ACCELERATOR_GPU_OACC
         #pragma omp simd reduction(+:radiated_energy_loc)
         for( int ipart=istart ; ipart<iend; ipart++ ) {
 
@@ -431,7 +431,7 @@ void RadiationNiel::operator()(
                          Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref],
                          Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] );
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
     }
 #else
         } // end acc parallel loop
diff --git a/src/Radiation/RadiationNiel.h b/src/Radiation/RadiationNiel.h
index 2a8be813c..01cf0564a 100755
--- a/src/Radiation/RadiationNiel.h
+++ b/src/Radiation/RadiationNiel.h
@@ -22,7 +22,7 @@
 #include "RadiationTools.h"
 #include "userFunctions.h"
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h
index bc5003966..77bcac8e2 100755
--- a/src/Radiation/RadiationTables.h
+++ b/src/Radiation/RadiationTables.h
@@ -58,7 +58,7 @@ class RadiationTables
     //! param[in] particle_chi particle quantum parameter
     //! param[in] particle_gamma particle Lorentz factor
     //! param[in] integfochi_table table of the discretized integrated f/chi function for Photon production yield computation
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computePhotonProductionYield( const double particle_chi,
@@ -77,7 +77,7 @@ class RadiationTables
     //! \param[in] xi
     //! \param[in] table_min_photon_chi
     //! \param[in] table_xi
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double computeRandomPhotonChiWithInterpolation( double particle_chi,
@@ -95,7 +95,7 @@ class RadiationTables
     //! from the computed table niel_.table
     //! \param particle_chi particle quantum parameter
     
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double getHNielFromTable( double particle_chi, double * tableNiel);
@@ -116,7 +116,7 @@ class RadiationTables
     //! \param particle_chi particle quantum parameter
     //! \param dt time step
     //#pragma omp declare simd
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getRidgersCorrectedRadiatedEnergy( const double particle_chi,
@@ -138,7 +138,7 @@ class RadiationTables
     //! Get of the classical continuous radiated energy during dt
     //! \param particle_chi particle quantum parameter
     //! \param dt time step
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getClassicalRadiatedEnergy( double particle_chi, double dt )
@@ -148,7 +148,7 @@ class RadiationTables
 
     //! Return the minimum_chi_discontinuous_ value
     //! Under this value, no discontinuous radiation reaction
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getMinimumChiDiscontinuous()
@@ -158,7 +158,7 @@ class RadiationTables
 
     //! Return the minimum_chi_continuous_ value
     //! Under this value, no continuous radiation reaction
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     inline double __attribute__((always_inline)) getMinimumChiContinuous()
diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h
index 33cb5f501..1746c894e 100644
--- a/src/Radiation/RadiationTools.h
+++ b/src/Radiation/RadiationTools.h
@@ -32,7 +32,7 @@ class RadiationTools {
         //! Valid between particle_chi in 1E-3 and 1E1
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitOrder10(double particle_chi)
@@ -62,7 +62,7 @@ class RadiationTools {
         //! Valid between particle_chi in 1E-3 and 1E1
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitOrder5(double particle_chi)
@@ -86,7 +86,7 @@ class RadiationTools {
         //! Ridgers et al., ArXiv 1708.04511 (2017)
         //! \param particle_chi particle quantum parameter
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) getHNielFitRidgers(double particle_chi)
@@ -104,7 +104,7 @@ class RadiationTools {
         //! approximation formulae
         //! \param particle_chi particle quantum parameter
         //#pragma omp declare simd
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi)
@@ -117,7 +117,7 @@ class RadiationTools {
         //! Return f1(nu) = Int_nu^\infty K_{5/3}(y) dy
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeF1Nu(double nu)
@@ -155,7 +155,7 @@ class RadiationTools {
         //! Return f2(nu) = BesselK_{2/3}(nu)
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeF2Nu(double nu)
@@ -194,7 +194,7 @@ class RadiationTools {
         //! = Int_nu^\infty K_{5/3}(y) dy + cst * BesselK_{2/3}(nu)
         //! used in computed synchrotron power spectrum
         // -----------------------------------------------------------------------------
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
         #pragma acc routine seq
 #endif
         static inline double __attribute__((always_inline)) computeBesselPartsRadiatedPower(double nu, double cst)
diff --git a/src/Radiation/Table.h b/src/Radiation/Table.h
index 8b74aeeaa..a028d4df3 100644
--- a/src/Radiation/Table.h
+++ b/src/Radiation/Table.h
@@ -45,7 +45,7 @@ class Table
     void compute_parameters();
     
     //! get value using linear interpolation at position x
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     double get(double x);
diff --git a/src/Smilei.cpp b/src/Smilei.cpp
index 15cd7b047..81ba6c258 100755
--- a/src/Smilei.cpp
+++ b/src/Smilei.cpp
@@ -20,7 +20,7 @@
 #include <iomanip>
 #include <string>
 #include <omp.h>
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -44,7 +44,7 @@ using namespace std;
 //                                                   MAIN CODE
 // ---------------------------------------------------------------------------------------------------------------------
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #ifdef _OPENACC
     void initialization_openacc()
     {
@@ -80,7 +80,7 @@ int main( int argc, char *argv[] )
     // -------------------------
 
     // Create the OpenACC environment
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     initialization_openacc();
 #endif
 
@@ -124,7 +124,7 @@ int main( int argc, char *argv[] )
         // oblivious to the program (only one, the one by default).
         // This could be a missed but very advanced optimization for some
         // kernels/exchange.
-        ERROR( "Simlei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." );
+        ERROR( "Smilei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." );
     } else {
         // ::omp_set_default_device(0);
     }
@@ -248,7 +248,7 @@ int main( int argc, char *argv[] )
 
         checkpoint.restartAll( vecPatches, region, &smpi, params );
 
-#if !defined( SMILEI_ACCELERATOR_MODE )
+#if !defined( SMILEI_ACCELERATOR_GPU )
         // CPU only, its too early to sort on GPU
         vecPatches.initialParticleSorting( params );
 #endif
@@ -271,7 +271,7 @@ int main( int argc, char *argv[] )
 
         PatchesFactory::createVector( vecPatches, params, &smpi, openPMD, &radiation_tables_, 0 );
 
-#if !(defined( SMILEI_ACCELERATOR_MODE ))
+#if !(defined( SMILEI_ACCELERATOR_GPU ))
         // CPU only, its too early to sort on GPU
         vecPatches.initialParticleSorting( params );
 #endif
@@ -407,7 +407,7 @@ int main( int argc, char *argv[] )
         }
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     TITLE( "GPU allocation and copy of the fields and particles" );
     // Allocate particle and field arrays
     // Also copy particle array content on device
@@ -629,7 +629,7 @@ int main( int argc, char *argv[] )
         #pragma omp parallel shared (time_dual,smpi,params, vecPatches, region, simWindow, checkpoint, itime)
         {
             // finalize particle exchanges and sort particles
-            vecPatches.finalizeAndSortParticles( params, &smpi, simWindow,
+            vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow,
                                                  time_dual, timers, itime );
 
             // Particle merging
@@ -685,7 +685,7 @@ int main( int argc, char *argv[] )
         } //End omp parallel region
 
         if( params.has_load_balancing && params.load_balancing_time_selection->theTimeIsNow( itime ) ) {
-// #if defined( SMILEI_ACCELERATOR_MODE )
+// #if defined( SMILEI_ACCELERATOR_GPU )
 //             ERROR( "Load balancing not tested on GPU !" );
 // #endif
             count_dlb++;
@@ -777,7 +777,7 @@ int main( int argc, char *argv[] )
         region.clean();
     }
     
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
     vecPatches.cleanDataOnDevice( params, &smpi, &radiation_tables_, &multiphoton_Breit_Wheeler_tables_ );
 #endif
     
diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp
index 0f7cebe9d..ff8efb17f 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.cpp
+++ b/src/SmileiMPI/AsyncMPIbuffers.cpp
@@ -1,5 +1,6 @@
 
 #include "AsyncMPIbuffers.h"
+#include "ParticlesFactory.h"
 #include "Field.h"
 #include "Patch.h"
 
@@ -66,30 +67,47 @@ SpeciesMPIbuffers::SpeciesMPIbuffers()
 
 SpeciesMPIbuffers::~SpeciesMPIbuffers()
 {
+    for( size_t i=0 ; i<partRecv.size() ; i++ ) {
+        delete partRecv[i][0];
+        delete partRecv[i][1];
+        delete partSend[i][0];
+        delete partSend[i][1];
+    }
 }
 
 
-void SpeciesMPIbuffers::allocate( unsigned int ndims )
+void SpeciesMPIbuffers::allocate( Params &params, Patch *patch )
 {
-    srequest.resize( ndims );
-    rrequest.resize( ndims );
+    srequest.resize( params.nDim_field );
+    rrequest.resize( params.nDim_field );
     
-    partRecv.resize( ndims );
-    partSend.resize( ndims );
+    partRecv.resize( params.nDim_field );
+    partSend.resize( params.nDim_field );
     
-    part_index_send.resize( ndims );
-    part_index_send_sz.resize( ndims );
-    part_index_recv_sz.resize( ndims );
+    partSendSize.resize( params.nDim_field );
+    partRecvSize.resize( params.nDim_field );
     
-    for( unsigned int i=0 ; i<ndims ; i++ ) {
+    for( unsigned int i=0 ; i<params.nDim_field ; i++ ) {
         srequest[i].resize( 2 );
         rrequest[i].resize( 2 );
+        partRecvSize[i].resize( 2 );
+        partSendSize[i].resize( 2 );
+        
+        // NOTE: send/recv buffers on xmin / xmax use a different constructor because
+        //       they must be sent on GPU for exchanging particles
         partRecv[i].resize( 2 );
         partSend[i].resize( 2 );
-        part_index_send[i].resize( 2 );
-        part_index_send_sz[i].resize( 2 );
-        part_index_recv_sz[i].resize( 2 );
+        if( i == 0 ) {
+            partRecv[i][0] = ParticlesFactory::create( params, *patch );
+            partRecv[i][1] = ParticlesFactory::create( params, *patch );
+            partSend[i][0] = ParticlesFactory::create( params, *patch );
+            partSend[i][1] = ParticlesFactory::create( params, *patch );
+        } else {
+            partRecv[i][0] = new Particles();
+            partRecv[i][1] = new Particles();
+            partSend[i][0] = new Particles();
+            partSend[i][1] = new Particles();
+        }
     }
-    
 }
 
diff --git a/src/SmileiMPI/AsyncMPIbuffers.h b/src/SmileiMPI/AsyncMPIbuffers.h
index b52883431..90ba02fb1 100755
--- a/src/SmileiMPI/AsyncMPIbuffers.h
+++ b/src/SmileiMPI/AsyncMPIbuffers.h
@@ -17,7 +17,7 @@ class AsyncMPIbuffers
     AsyncMPIbuffers();
     ~AsyncMPIbuffers();
     
-    virtual void allocate( unsigned int nDim_field );
+    void allocate( unsigned int nDim_field );
     
     void defineTags( Patch *patch, SmileiMPI *smpi, int tag ) ;
     
@@ -38,21 +38,17 @@ class SpeciesMPIbuffers : public AsyncMPIbuffers
     SpeciesMPIbuffers();
     ~SpeciesMPIbuffers();
     
-    void allocate( unsigned int nDim_field ) ;
+    void allocate( Params &params, Patch *patch ) ;
     
     //! ndim vectors of 2 sent packets of particles (1 per direction)
-    std::vector< std::vector<Particles > > partRecv;
+    std::vector< std::vector<Particles* > > partRecv;
     //! ndim vectors of 2 received packets of particles (1 per direction)
-    std::vector< std::vector<Particles > > partSend;
+    std::vector< std::vector<Particles* > > partSend;
     
-    //! ndim vectors of 2 vectors of index particles to send (1 per direction)
-    //!   - not sent
-    //    - used to sort Species::indexes_of_particles_to_exchange built in Species::dynamics
-    std::vector< std::vector< std::vector<int> > > part_index_send;
     //! ndim vectors of 2 numbers of particles to send (1 per direction)
-    std::vector< std::vector< unsigned int > > part_index_send_sz;
+    std::vector< std::vector< unsigned int > > partSendSize;
     //! ndim vectors of 2 numbers of particles to receive (1 per direction)
-    std::vector< std::vector< unsigned int > > part_index_recv_sz;
+    std::vector< std::vector< unsigned int > > partRecvSize;
     
 };
 
diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp
index c35a69fe9..5e3a6b2da 100755
--- a/src/SmileiMPI/SmileiMPI.cpp
+++ b/src/SmileiMPI/SmileiMPI.cpp
@@ -763,7 +763,7 @@ void SmileiMPI::isend_species( Patch *patch, int to, int &irequest, int tag, Par
         irequest ++;
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU) 
 
     // For the particles
     for( unsigned int ispec=0; ispec<nspec; ispec++ ) {
@@ -904,7 +904,7 @@ void SmileiMPI::recv_species( Patch *patch, int from, int &tag, Params &params )
         }
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU) 
 
     for( unsigned int ispec=0; ispec<nspec; ispec++ ) {
 
@@ -929,8 +929,7 @@ void SmileiMPI::recv_species( Patch *patch, int from, int &tag, Params &params )
             recv( patch->vecSpecies[ispec]->particles, from, tag+2*ispec, recvParts );
             MPI_Type_free( &( recvParts ) );
         }
-        patch->vecSpecies[ispec]->particles->initializeDataOnDevice();
-        patch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice();
+        patch->vecSpecies[ispec]->allocateParticlesOnDevice();
 
     }
 
@@ -1210,7 +1209,7 @@ void  SmileiMPI::send_PML(ElectroMagn *EM, Tpml embc, int bcId, int to, int &ire
 void SmileiMPI::isend( ElectroMagn *EM, int to, int &irequest, vector<MPI_Request> &requests, int tag, bool send_xmax_bc )
 {
 
-// #if defined (SMILEI_ACCELERATOR_MODE)
+// #if defined (SMILEI_ACCELERATOR_GPU)
 
 //     isendOnDevice( EM->Ex_, to, tag+irequest, requests[irequest] );
 //     irequest++;
@@ -1319,9 +1318,9 @@ void SmileiMPI::isend( ElectroMagn *EM, int to, int &irequest, vector<MPI_Reques
 
             if( dynamic_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] ) ) {
                 ElectroMagnBC1D_SM *embc = static_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] );
-                MPI_Isend( &( embc->By_val ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
+                MPI_Isend( &( embc->By_val_ ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
                 irequest++;
-                MPI_Isend( &( embc->Bz_val ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
+                MPI_Isend( &( embc->Bz_val_ ), 1, MPI_DOUBLE, to, tag+irequest, MPI_COMM_WORLD, &requests[irequest] );
                 irequest++;
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EM->emBoundCond[bcId] ) ) {
                 // BCs at the x-border
@@ -1746,7 +1745,7 @@ int  SmileiMPI::recv_PML(ElectroMagn *EM, Tpml embc, int bcId, int from, int tag
 void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc )
 {
 
-// #if defined (SMILEI_ACCELERATOR_MODE)
+// #if defined (SMILEI_ACCELERATOR_GPU)
 
 //      recvOnDevice( EM->Ex_, from, tag );
 //      tag++;
@@ -1855,9 +1854,9 @@ void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc )
             if( dynamic_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] ) ) {
                 ElectroMagnBC1D_SM *embc = static_cast<ElectroMagnBC1D_SM *>( EM->emBoundCond[bcId] );
                 MPI_Status status;
-                MPI_Recv( &( embc->By_val ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
+                MPI_Recv( &( embc->By_val_ ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
                 tag++;
-                MPI_Recv( &( embc->Bz_val ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
+                MPI_Recv( &( embc->Bz_val_ ), 1, MPI_DOUBLE, from, tag, MPI_COMM_WORLD, &status );
                 tag++;
             } else if( dynamic_cast<ElectroMagnBC2D_SM *>( EM->emBoundCond[bcId] ) ) {
                 // BCs at the x-border
@@ -2122,7 +2121,7 @@ void SmileiMPI::isend( Field *field, int to, int tag, MPI_Request &request )
 } // End isend ( Field )
 
 
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
 //! Sends the whole Field Device to Device (assuming MPI enables it)
 void SmileiMPI::isendOnDevice( Field *field, int to, int tag, MPI_Request &request )
 {
@@ -2195,7 +2194,7 @@ void SmileiMPI::recv( Field *field, int from, int tag )
 
 } // End recv ( Field )
 
-#if defined (SMILEI_ACCELERATOR_MODE) 
+#if defined (SMILEI_ACCELERATOR_GPU) 
 void SmileiMPI::recvOnDevice( Field *field, int from, int tag )
 {
 
@@ -2525,7 +2524,7 @@ void SmileiMPI::eraseBufferParticleTrail( const int ndim, const int istart, cons
 }
 
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
 template <typename Container>
 static inline void
diff --git a/src/SmileiMPI/SmileiMPI.h b/src/SmileiMPI/SmileiMPI.h
index 13cacc416..2785921de 100755
--- a/src/SmileiMPI/SmileiMPI.h
+++ b/src/SmileiMPI/SmileiMPI.h
@@ -103,7 +103,7 @@ class SmileiMPI
     //! Sends the whole Field
     void isend( Field *field, int to, int tag, MPI_Request &request );
     //! Sends the whole Field Device to Device (assuming MPI enables it)
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
     void isendOnDevice( Field *field, int to, int tag, MPI_Request &request );
 #endif
 
@@ -114,7 +114,7 @@ class SmileiMPI
     //! Receives the whole Field
     void recv( Field *field, int from, int tag);     
     //! Receives the whole Field Device to Device (assuming MPI enables it)
-#if defined (SMILEI_ACCELERATOR_MODE)
+#if defined (SMILEI_ACCELERATOR_GPU)
     void recvOnDevice( Field *field, int from, int tag);     
 #endif
 
@@ -248,7 +248,7 @@ class SmileiMPI
     //! Erase Particles from istart ot the end in the buffers of thread ithread
     void eraseBufferParticleTrail( const int ndim, const int istart, const int ithread, bool isAM = false );
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     //! Map CPU buffers onto the GPU to at least accommodate particle_count
     //! particles. This method tries to reduce the number of
     //! allocation/deallocation which produces a lot of fragmentation on some
diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp
index 37462566f..7555cb778 100755
--- a/src/Species/Species.cpp
+++ b/src/Species/Species.cpp
@@ -90,7 +90,6 @@ Species::Species( Params &params, Patch *patch ) :
 {
     // &particles_sorted[0]
     particles         = ParticlesFactory::create( params, *patch );
-    particles_to_move = ParticlesFactory::create( params, *patch );
 
     regular_number_array_.clear();
     partBoundCond = NULL;
@@ -104,7 +103,7 @@ Species::Species( Params &params, Patch *patch ) :
     dx_inv_[1] = 1./cell_length[1];
     dx_inv_[2] = 1./cell_length[2];
 
-    initCluster( params );
+    initCluster( params, patch );
     inv_nDim_particles = 1./( ( double )nDim_particle );
 
     length_[0]=0;
@@ -123,7 +122,7 @@ Species::Species( Params &params, Patch *patch ) :
 
 }//END Species creator
 
-void Species::initCluster( Params &params )
+void Species::initCluster( Params &params, Patch *patch )
 {
     // NOTE: On GPU we dont use first_index, it would contain redundant data but
     // we are forced to initialize it due to ParticleCreator::create() and the
@@ -252,7 +251,7 @@ void Species::initCluster( Params &params )
 #endif
 
     //Initialize specMPI
-    MPI_buffer_.allocate( nDim_field );
+    MPI_buffer_.allocate( params, patch );
 
     //ener_tot = 0.;
     nrj_bc_lost = 0.;
@@ -378,18 +377,14 @@ void Species::initOperators( Params &params, Patch *patch )
     partBoundCond = new PartBoundCond( params, this, patch );
     for( unsigned int iDim=0 ; iDim < nDim_field ; iDim++ ) {
         for( unsigned int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) {
-            MPI_buffer_.partRecv[iDim][iNeighbor].initialize( 0, ( *particles ) );
-            MPI_buffer_.partSend[iDim][iNeighbor].initialize( 0, ( *particles ) );
-            MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 );
-            MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0;
-            MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = 0;
+            MPI_buffer_.partRecv[iDim][iNeighbor]->initialize( 0, ( *particles ) );
+            MPI_buffer_.partSend[iDim][iNeighbor]->initialize( 0, ( *particles ) );
         }
     }
     typePartSend.resize( nDim_field*2, MPI_DATATYPE_NULL );
     typePartRecv.resize( nDim_field*2, MPI_DATATYPE_NULL );
     exchangePatch = MPI_DATATYPE_NULL;
 
-    particles_to_move->initialize( 0, *particles );
 
 }
 
@@ -399,7 +394,6 @@ void Species::initOperators( Params &params, Patch *patch )
 Species::~Species()
 {
     delete particles;
-    delete particles_to_move;
     
     delete Push;
     delete Interp;
@@ -506,7 +500,7 @@ Species::~Species()
 
 }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
 //! Prepare the species Current and Rho grids on Device
 void
 Species::prepareSpeciesCurrentAndChargeOnDevice( 
@@ -546,7 +540,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
     }
 
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
         #pragma acc parallel present( Jx_s[0:Jx_size],     \
                                         Jy_s[0:Jy_size], \
                                         Jz_s[0:Jz_size],   \
@@ -557,7 +551,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jx_size; i++ ) {
@@ -568,7 +562,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jy_size; i++ ) {
@@ -579,7 +573,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<Jz_size; i++ ) {
@@ -590,14 +584,14 @@ Species::prepareSpeciesCurrentAndChargeOnDevice(
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
             #pragma omp target
             #pragma omp teams distribute parallel for
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
             #pragma acc loop gang worker vector
 #endif
             for( unsigned int i=0 ; i<rho_size; i++ ) {
                 rho_s[i] = 0;
             }
         }
-#if defined( SMILEI_OPENACC_MODE )  
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )  
         } // end parallel region
 #endif
 }
@@ -634,6 +628,30 @@ Species::deleteSpeciesCurrentAndChargeOnDevice(
     }
 }
 
+
+void Species::allocateParticlesOnDevice()
+{
+    particles->initializeDataOnDevice();
+    
+    // The first send/recv buffers are also on device
+    MPI_buffer_.partSend[0][0]->initializeDataOnDevice();
+    MPI_buffer_.partSend[0][1]->initializeDataOnDevice();
+    MPI_buffer_.partRecv[0][0]->initializeDataOnDevice();
+    MPI_buffer_.partRecv[0][1]->initializeDataOnDevice();
+
+    // Create photon species on the device
+    if( radiation_model_ == "mc" && photon_species_ ) {
+        radiated_photons_->initializeDataOnDevice();
+    }
+
+    // Create pair species on the device
+    if( mBW_pair_species_[0] && mBW_pair_species_[1] ) {
+        mBW_pair_particles_[0]->initializeDataOnDevice();
+        mBW_pair_particles_[1]->initializeDataOnDevice();
+    }
+}
+
+
 //! Copy particles from host to device
 void
 Species::copyParticlesFromHostToDevice()
@@ -641,7 +659,7 @@ Species::copyParticlesFromHostToDevice()
     particles->copyFromHostToDevice();
 }
 
-#endif // end if SMILEI_ACCELERATOR_MODE
+#endif // end if SMILEI_ACCELERATOR_GPU
 
 // ---------------------------------------------------------------------------------------------------------------------
 //! Method calculating the Particle dynamics (interpolation, pusher, projection and more)
@@ -682,7 +700,7 @@ void Species::dynamics( double time_dual,
     if( time_dual>time_frozen_ || Ionize) { // moving particle
 
         // Prepare temporary buffers for this iteration
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         smpi->resizeDeviceBuffers( ithread,
                                    nDim_field,
                                    particles->numberOfParticles() );
@@ -695,7 +713,7 @@ void Species::dynamics( double time_dual,
 
             patch->startFineTimer(mBW_timer_id_);
 
-#if defined( SMILEI_OPENACC_MODE) 
+#if defined( SMILEI_ACCELERATOR_GPU_OACC) 
             static_cast<nvidiaParticles*>(mBW_pair_particles_[0])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(0) );
             static_cast<nvidiaParticles*>(mBW_pair_particles_[0])->resetCellKeys();
             static_cast<nvidiaParticles*>(mBW_pair_particles_[1])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(1) );
@@ -708,7 +726,7 @@ void Species::dynamics( double time_dual,
             patch->stopFineTimer(mBW_timer_id_);
         }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
         // Make sure some bin preconditions are respected
         SMILEI_ASSERT( particles->first_index.size() == 1 );
         SMILEI_ASSERT( particles->last_index.size() >= 1 );
@@ -814,7 +832,7 @@ void Species::dynamics( double time_dual,
             // Compression of the bins if necessary
             if( Multiphoton_Breit_Wheeler_process ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 removeTaggedParticles(smpi,
                                     &particles->first_index[0],
                                     &particles->last_index[0],
@@ -1672,14 +1690,14 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
         // Radiation losses
         if( Radiate && photon_species_ ) {
             // If creation of macro-photon, we add them to photon_species
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We first erase empty slots in the buffer of photons
                 // radiation_photons_->cell_keys is used as a mask
             static_cast<nvidiaParticles*>(radiated_photons_)->eraseLeavingParticles();
 #endif
             photon_species_->importParticles( params, patch, *radiated_photons_, localDiags, time_dual );
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
             // We explicitely clear the device Particles
             static_cast<nvidiaParticles*>(radiated_photons_)->deviceClear();
 #endif
@@ -1691,7 +1709,7 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
             // Addition of the electron-positron particles
             for( int k=0; k<2; k++ ) {
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We first erase empty slots in the buffer of photons
                 // radiation_photons_->cell_keys is used as a mask
                 static_cast<nvidiaParticles*>(mBW_pair_particles_[k])->eraseLeavingParticles();
@@ -1699,7 +1717,7 @@ void Species::dynamicsImportParticles( double time_dual, Params &params, Patch *
 
                 mBW_pair_species_[k]->importParticles( params, patch, *mBW_pair_particles_[k], localDiags, time_dual );
                 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
                 // We explicitely clear the device Particles
                 static_cast<nvidiaParticles*>(mBW_pair_particles_[k])->deviceClear();
 #endif
@@ -1747,53 +1765,32 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ )
 }//END computeCharge
 
 
-void Species::extractParticles()
-{
-    particles->extractParticles( particles_to_move );
-}
-
-// void Species::injectParticles( Params &params )
-// {
-// }
-
-
 // ---------------------------------------------------------------------------------------------------------------------
 //! Sort particles
 // ---------------------------------------------------------------------------------------------------------------------
 void Species::sortParticles( Params &params )
 {
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
 
     // -----------------------------
     // GPU version
-
-    // particles_to_move contains, up to here, send particles
-    //   clean it to manage recv particles
-    particles_to_move->clear(); // Clear on the host
-    // Merge all MPI_buffer_.partRecv in particles_to_move
-    for( int idim = 0; idim < params.nDim_field; idim++ ) {
-        for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) {
-            int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
-            if( ( n_part_recv != 0 ) ) {
-                // insert n_part_recv in particles_to_move from 0
-                MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0,
-                                                                     n_part_recv,
-                                                                     *particles_to_move,
-                                                                     particles_to_move->size() );
+    
+    // Merge all MPI_buffer_.partRecv in the first one
+    Particles * first_buffer = MPI_buffer_.partRecv[0][0];
+    for( auto &partRecvs: MPI_buffer_.partRecv ) {
+        for( auto partRecv: partRecvs ) {
+            if( partRecv != first_buffer && partRecv->size() > 0 ) {
+                partRecv->copyParticles( 0, partRecv->size(), *first_buffer, first_buffer->size() );
+                partRecv->clear();
             }
         }
     }
-
-    particles_to_move->copyFromHostToDevice();
-
-    // // Erase particles that leaves this patch
-    // particles->last_index[0] = particles->eraseLeavingParticles();
-    //
-    // // Inject newly arrived particles in particles_to_move
-    // particles->last_index[0] += particles->injectParticles( particles_to_move );
-
-    particles->importAndSortParticles( particles_to_move );
+    
+    first_buffer->copyFromHostToDevice();
+    
+    particles->importAndSortParticles( first_buffer );
+    
 #else
 
     // --------------------------
@@ -1804,28 +1801,10 @@ void Species::sortParticles( Params &params )
     int ndim = params.nDim_field;
     int idim;
 
-    // Compute total number of particles received
-    // int total_number_part_recv = 0;
-    //Merge all MPI_buffer_.partRecv in particles_to_move
-    // for( int idim = 0; idim < ndim; idim++ ) {
-    //     for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) {
-    //         int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
-    //         if( ( n_part_recv!=0 ) ) {
-    //              // insert n_part_recv in particles_to_move from 0
-    //             //MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, n_part_recv, *particles_to_move, 0 );
-    //             total_number_part_recv += n_part_recv;
-    //             //particles->last_index[particles->last_index.size()-1] += n_part_recv;
-    //             //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv);
-    //         }
-    //     }
-    // }
-    //cout << "\t Species id : " << species_number_ << " - nparticles recv : " << blabla << endl;
-
-
     // Sort to adapt do cell_keys usage
     std::vector<int> indexes_of_particles_to_exchange;
     for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) {
-        if ( particles->cell_keys[ipart] == -1 ) {
+        if ( particles->cell_keys[ipart] < 0 ) {
             indexes_of_particles_to_exchange.push_back( ipart );
         }
     }
@@ -1900,15 +1879,15 @@ void Species::sortParticles( Params &params )
 
     //Evaluation of the necessary shift of all bins.2
     //idim=0
-    shift[1] += MPI_buffer_.part_index_recv_sz[0][0];//Particles coming from xmin all go to bin 0 and shift all the other bins.
-    shift[particles->last_index.size()] += MPI_buffer_.part_index_recv_sz[0][1];//Used only to count the total number of particles arrived.
+    shift[1] += MPI_buffer_.partRecv[0][0]->size();//Particles coming from xmin all go to bin 0 and shift all the other bins.
+    shift[particles->last_index.size()] += MPI_buffer_.partRecv[0][1]->size();//Used only to count the total number of particles arrived.
     //idim>0
     for( idim = 1; idim < ndim; idim++ ) {
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
+            n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
             for( unsigned int j=0; j<( unsigned int )n_part_recv ; j++ ) {
                 //We first evaluate how many particles arrive in each bin.
-                ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
+                ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
                 shift[ii+1]++; // It makes the next bins shift.
             }
         }
@@ -1943,11 +1922,11 @@ void Species::sortParticles( Params &params )
     //Space has been made now to write the arriving particles into the correct bins
     //idim == 0  is the easy case, when particles arrive either in first or last bin.
     for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-        n_part_recv = MPI_buffer_.part_index_recv_sz[0][iNeighbor];
+        n_part_recv = MPI_buffer_.partRecv[0][iNeighbor]->size();
         //if ( (neighbor_[0][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) {
         if( ( n_part_recv!=0 ) ) {
             ii = iNeighbor*( particles->last_index.size()-1 ); //0 if iNeighbor=0(particles coming from Xmin) and particles->last_index.size()-1 otherwise.
-            MPI_buffer_.partRecv[0][iNeighbor].overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv );
+            MPI_buffer_.partRecv[0][iNeighbor]->overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv );
             particles->last_index[ii] += n_part_recv ;
         }
     }
@@ -1955,12 +1934,12 @@ void Species::sortParticles( Params &params )
     for( idim = 1; idim < ndim; idim++ ) {
         //if (idim!=iDim) continue;
         for( int iNeighbor=0 ; iNeighbor<nbNeighbors_ ; iNeighbor++ ) {
-            n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor];
+            n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size();
             //if ( (neighbor_[idim][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) {
             if( ( n_part_recv!=0 ) ) {
                 for( unsigned int j=0; j<( unsigned int )n_part_recv; j++ ) {
-                    ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
-                    MPI_buffer_.partRecv[idim][iNeighbor].overwriteParticle( j, *particles, particles->last_index[ii] );
+                    ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes.
+                    MPI_buffer_.partRecv[idim][iNeighbor]->overwriteParticle( j, *particles, particles->last_index[ii] );
                     particles->last_index[ii] ++ ;
                 }
             }
@@ -2117,15 +2096,16 @@ void Species::countSortParticles( Params &params )
 // Move all particles from another species to this one
 void Species::importParticles( Params &params, Patch *patch, Particles &source_particles, vector<Diagnostic *> &localDiags, double time_dual, Ionization *I )
 {
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC )
     // ---------------------------------------------------
     // GPU version
     // Warning: the GPU version does not handle bin and sorting
     // Warning: the current GPU version does not handle tracked particles
 
     // Inject particles from source_particles
-    particles->last_index.back() += particles->injectParticles( &source_particles );
+    particles->last_index.back() += particles->addParticles( &source_particles );
     particles->last_index[0] = particles->last_index.back();
+    source_particles.resize( 0 );
     
 #else
     // ---------------------------------------------------
@@ -2228,7 +2208,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
     const int nparts = smpi->dynamics_Epart[ithread].size()/3;
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 
     double *const __restrict__ weight =  particles->getPtrWeight();
 
@@ -2267,7 +2247,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
     const int nbin = particles->numberOfBins();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel \
     present(Ex[0:nparts],Ey[0:nparts],Ez[0:nparts], \
     Bx[0:nparts], By[0:nparts], Bz[0:nparts], \
@@ -2312,7 +2292,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
 
             if (copy_particle_number>0) {
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                 particles->overwriteParticle(copy_first_index, particles->last_index[ibin], copy_particle_number, compute_cell_keys );
 #else
                 for (auto ipart = 0 ; ipart < copy_particle_number ; ipart ++) {
@@ -2367,7 +2347,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
                     }
                 }
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                 if (thetaold) {
                     for( unsigned int ipart = 0 ; ipart < copy_particle_number ; ipart ++ ) {
                         thetaold[copy_first_index + ipart] = thetaold[particles->last_index[ibin] + ipart];
@@ -2405,7 +2385,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) {
         }
     }
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 } // end parallel region
 #endif
 
@@ -2439,7 +2419,7 @@ void Species::removeTaggedParticlesPerBin(
     // Weight shortcut
     double *const __restrict__ weight =  particles->getPtrWeight();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     double *const __restrict__ position_x = particles->getPtrPosition( 0 );
     double *const __restrict__ position_y = nDim_particle > 1 ? particles->getPtrPosition( 1 ) : nullptr;
     double *const __restrict__ position_z = nDim_particle > 2 ? particles->getPtrPosition( 2 ) : nullptr;
@@ -2457,7 +2437,7 @@ void Species::removeTaggedParticlesPerBin(
     // Total number of bins / cells
     const int nbin = particles->numberOfBins();
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc parallel  \
     present(Epart[0:nparts*3],\
     Bpart[0:nparts*3], \
@@ -2499,7 +2479,7 @@ void Species::removeTaggedParticlesPerBin(
                     if( ipart < last_photon_index ) {
                         // The last existing photon comes to the position of
                         // the deleted photon
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         particles->overwriteParticle( last_photon_index, ipart, compute_cell_keys );
 #else
                         weight[ipart] = weight[last_photon_index];
@@ -2533,7 +2513,7 @@ void Species::removeTaggedParticlesPerBin(
                         }
                         gamma[ipart] = gamma[0*nparts+last_photon_index];
 
-#ifndef SMILEI_OPENACC_MODE
+#ifndef SMILEI_ACCELERATOR_GPU_OACC
                         if (thetaold) {
                             thetaold[0*nparts+ipart] = thetaold[0*nparts+last_photon_index];
                         }
@@ -2560,13 +2540,14 @@ void Species::removeTaggedParticlesPerBin(
         } // if last_index[ibin] > first_index[ibin]
     } // end loop over the bins
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     } // end parallel region
 #endif
 }
 
 //! This method removes particles with a negative weight
 //! when a single bin is used
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 void Species::removeTaggedParticles(
     SmileiMPI *smpi,
     int *const first_index,
@@ -2575,8 +2556,6 @@ void Species::removeTaggedParticles(
     bool compute_cell_keys)
 {
 
-#ifdef SMILEI_OPENACC_MODE
-
     unsigned int new_n_parts = 0;
     unsigned int nb_deleted  = 0;
 
@@ -2644,7 +2623,7 @@ void Species::removeTaggedParticles(
         // that will not be erased
 
         // Backward loop over the tagged particles to fill holes in the photon particle array (at the bin level only)
-//#ifdef SMILEI_OPENACC_MODE
+//#ifdef SMILEI_ACCELERATOR_GPU_OACC
 //        #pragma acc loop seq
 //#endif
         for( int ipart=last_moving_index-1 ; ipart>=*first_index; ipart-- ) {
@@ -2721,9 +2700,9 @@ void Species::removeTaggedParticles(
     }
     } // if nparts > 0
 
+}
 #endif
 
-}
 
 // ------------------------------------------------
 // Set position when using restart & moving window
diff --git a/src/Species/Species.h b/src/Species/Species.h
index 56c693d65..d4af3bf9d 100755
--- a/src/Species/Species.h
+++ b/src/Species/Species.h
@@ -6,7 +6,7 @@
 // #include "PyTools.h"
 
 #include "Particles.h"
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include "nvidiaParticles.h"
 #endif
 #include "Params.h"
@@ -147,8 +147,6 @@ class Species
 
     //! Vector containing all Particles of the considered Species
     Particles *particles;
-    //! Data structure through which passes particles which move from one patch to another
-    Particles *particles_to_move;
     Particles particles_sorted[2];
     //std::vector<int> index_of_particles_to_exchange;
 
@@ -344,7 +342,7 @@ class Species
     // -----------------------------------------------------------------------------
     //  5. Methods
 
-    virtual void initCluster( Params & );
+    virtual void initCluster( Params &, Patch * );
 
     virtual void resizeCluster( Params & );
 
@@ -384,7 +382,9 @@ class Species
         return particles->capacity();
     }
 
-#if defined( SMILEI_ACCELERATOR_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU )
+
+    void allocateParticlesOnDevice();
 
     //! Copy particles from host to device
     void
@@ -482,12 +482,6 @@ class Species
     //! Method calculating the Particle charge on the grid (projection)
     virtual void computeCharge( ElectroMagn *EMfields, bool old=false );
 
-    //! Method used to select particles which will change of patches
-    virtual void extractParticles();
-
-    //! Method used to integrate particles which come from another patches
-    // virtual void injectParticles( Params &params );
-
     //! Method used to inject and sort particles
     virtual void sortParticles( Params &param );
 
@@ -572,12 +566,14 @@ class Species
 
     //! This method removes particles with a negative weight
     //! when a single bin is used
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     void removeTaggedParticles(
         SmileiMPI *smpi,
         int *const first_index,
         int *const last_index,
         int ithread,
         bool compute_cell_keys = false);
+#endif
 
     //! Moving window boundary conditions managment
     void disableXmax();
diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp
index 98d5d9dbb..4a4199b63 100755
--- a/src/Species/SpeciesV.cpp
+++ b/src/Species/SpeciesV.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesV::SpeciesV( Params &params, Patch *patch ) :
     Species( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 
@@ -106,7 +106,7 @@ SpeciesV::~SpeciesV()
 }
 
 
-void SpeciesV::initCluster( Params &params )
+void SpeciesV::initCluster( Params &params, Patch *patch )
 {
     int ncells = 1;
     for( unsigned int iDim=0 ; iDim<nDim_field ; iDim++ ) {
@@ -224,7 +224,7 @@ void SpeciesV::initCluster( Params &params )
 #endif
 
     //Initialize specMPI
-    MPI_buffer_.allocate( nDim_field );
+    MPI_buffer_.allocate( params, patch );
 
     //ener_tot = 0.;
     nrj_bc_lost = 0.;
@@ -518,7 +518,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
                     nrj_lost_per_thd[tid] += mass_ * energy_lost;
 
                     // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                    //     if ( particles->cell_keys[iPart] != -1 ) {
+                    //     if ( particles->cell_keys[iPart] >= 0 ) {
                     //         //Compute cell_keys of remaining particles
                     //         for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                     //             particles->cell_keys[iPart] *= length_[i];
@@ -552,7 +552,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
             //    if( mass_>0 ) {
 
             //        for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-            //            if ( particles->cell_keys[iPart] != -1 ) {
+            //            if ( particles->cell_keys[iPart] >= 0 ) {
             //                //Compute cell_keys of remaining particles
             //                for( unsigned int i = 0 ; i<nDim_field; i++ ) {
             //                    particles->cell_keys[iPart] *= this->length_[i];
@@ -564,7 +564,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec,
             //        }
 
             //        for( iPart=particles->first_index[ipack*packsize_+scell] ; iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-            //            if ( particles->cell_keys[iPart] != -1 ) {
+            //            if ( particles->cell_keys[iPart] >= 0 ) {
             //                //Compute cell_keys of remaining particles
             //                for( unsigned int i = 0 ; i<nDim_field; i++ ) {
             //                    particles->cell_keys[iPart] *= this->length_[i];
@@ -1053,7 +1053,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec,
             if( mass_>0 ) {
                 for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) {
                     for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= this->length_[i];
@@ -1067,7 +1067,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec,
             } else if( mass_==0 ) {
                 for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) {
                     for( int iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= length[i];
@@ -1366,27 +1366,27 @@ void SpeciesV::sortParticles( Params &params )
     //Loop over just arrived particles to compute their cell keys and contribution to count
     for( unsigned int idim=0; idim < nDim_field ; idim++ ) {
         for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) {
-            buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.part_index_recv_sz[idim][ineighbor] );
+            buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.partRecv[idim][ineighbor]->size() );
 
             // #pragma omp simd
-            // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
             //     for( unsigned int ipos=0; ipos < nDim_field ; ipos++ ) {
-            //         double X = ((this)->*(distance[ipos]))(&MPI_buffer_.partRecv[idim][ineighbor], ipos, ip);
+            //         double X = ((this)->*(distance[ipos]))(MPI_buffer_.partRecv[idim][ineighbor], ipos, ip);
             //         int IX = round( X * dx_inv_[ipos] );
             //         buf_cell_keys[idim][ineighbor][ip] = buf_cell_keys[idim][ineighbor][ip] * length_[ipos] + IX;
             //     }
             // }
             // // not vectorizable because random access to count
-            // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
             //     count[buf_cell_keys[idim][ineighbor][ip]] ++;
             // }
 
             computeParticleCellKeys( params,
-                                     &MPI_buffer_.partRecv[idim][ineighbor],
+                                     MPI_buffer_.partRecv[idim][ineighbor],
                                      &buf_cell_keys[idim][ineighbor][0],
                                      &count[0],
                                      0,
-                                     MPI_buffer_.part_index_recv_sz[idim][ineighbor] );
+                                     MPI_buffer_.partRecv[idim][ineighbor]->size() );
 
         }
     }
@@ -1403,8 +1403,8 @@ void SpeciesV::sortParticles( Params &params )
 
     //Now proceed to the cycle sort
 
-    if( MPI_buffer_.partRecv[0][0].size() == 0 ) {
-        MPI_buffer_.partRecv[0][0].initialize( 0, *particles );    //Is this correct ?
+    if( MPI_buffer_.partRecv[0][0]->size() == 0 ) {
+        MPI_buffer_.partRecv[0][0]->initialize( 0, *particles );    //Is this correct ?
     }
 
     // Resize the particle vector
@@ -1418,7 +1418,7 @@ void SpeciesV::sortParticles( Params &params )
     //Copy all particles from MPI buffers back to the writable particles via cycle sort pass.
     for( unsigned int idim=0; idim < nDim_field ; idim++ ) {
         for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) {
-            for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) {
+            for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) {
                 cycle.resize( 1 );
                 cell_target = buf_cell_keys[idim][ineighbor][ip];
                 ip_dest = particles->first_index[cell_target];
@@ -1429,7 +1429,7 @@ void SpeciesV::sortParticles( Params &params )
                 cycle[0] = ip_dest;
                 cell_target = particles->cell_keys[ip_dest];
                 //As long as the particle is not erased, we can build up the cycle.
-                while( cell_target != -1 ) {
+                while( cell_target >= 0 ) {
                     ip_dest = particles->first_index[cell_target];
                     while( particles->cell_keys[ip_dest] == cell_target ) {
                         ip_dest++;
@@ -1441,7 +1441,7 @@ void SpeciesV::sortParticles( Params &params )
                 //Last target_cell is -1, the particle must be erased:
                 particles->translateParticles( cycle );
                 //Eventually copy particle from the MPI buffer into the particle vector.
-                MPI_buffer_.partRecv[idim][ineighbor].overwriteParticle( ip, *particles, cycle[0] );
+                MPI_buffer_.partRecv[idim][ineighbor]->overwriteParticle( ip, *particles, cycle[0] );
             }
         }
     }
@@ -1450,14 +1450,14 @@ void SpeciesV::sortParticles( Params &params )
     for( unsigned int ip=( unsigned int )particles->last_index.back(); ip < npart; ip++ ) {
         cell_target = particles->cell_keys[ip];
 
-        if( cell_target == -1 ) {
+        if( cell_target < 0 ) {
             continue;
         }
         cycle.resize( 0 );
         cycle.push_back( ip );
 
         //As long as the particle is not erased, we can build up the cycle.
-        while( cell_target != -1 ) {
+        while( cell_target >= 0 ) {
 
             ip_dest = particles->first_index[cell_target];
 
@@ -1533,7 +1533,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++ ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys particles
                 cell_keys[iPart]  = std::round( position_x[iPart] * dx_inv_[0]) - min_loc_l ;
                 cell_keys[iPart] *= length_[1];
@@ -1553,7 +1553,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
                 cell_keys[iPart] *= length_[1];
@@ -1573,7 +1573,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
                 cell_keys[iPart] *= length_[1];
@@ -1589,7 +1589,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
 
         #pragma omp simd
         for( iPart=istart; iPart < iend ; iPart++  ) {
-            if ( cell_keys[iPart] != -1 ) {
+            if ( cell_keys[iPart] >= 0 ) {
                 //Compute cell_keys of remaining particles
                 cell_keys[iPart]  = round(position_x[iPart] * dx_inv_[0] )- min_loc_x ;
             }
@@ -1598,7 +1598,7 @@ void SpeciesV::computeParticleCellKeys( Params    & params,
     }
 
     for( iPart=istart; iPart < iend ; iPart++  ) {
-        if ( cell_keys[iPart] != -1 ) {
+        if ( cell_keys[iPart] >= 0 ) {
             count[cell_keys[iPart]] ++;
         }
     }
@@ -2526,7 +2526,7 @@ void SpeciesV::ponderomotiveUpdatePositionAndCurrentsTasks( double time_dual, un
 
                 smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11);
                 for( int iPart=particles->first_index[scell] ; iPart<particles->last_index[scell]; iPart++ ) {
-                    if ( particles->cell_keys[iPart] != -1 ) {
+                    if ( particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         for( int i = 0 ; i<( int )nDim_field; i++ ) {
                             particles->cell_keys[iPart] *= length_[i];
diff --git a/src/Species/SpeciesV.h b/src/Species/SpeciesV.h
index 39dc45089..7f5fe587c 100755
--- a/src/Species/SpeciesV.h
+++ b/src/Species/SpeciesV.h
@@ -26,7 +26,7 @@ class SpeciesV : public Species
     //! Species destructor
     virtual ~SpeciesV();
 
-    void initCluster( Params &params ) override;
+    void initCluster( Params &params, Patch *patch ) override;
     
     //! Method calculating the Particle dynamics (interpolation, pusher, projection)
     void dynamics( double time, unsigned int ispec,
diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp
index b24d86711..273362561 100755
--- a/src/Species/SpeciesVAdaptive.cpp
+++ b/src/Species/SpeciesVAdaptive.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesVAdaptive::SpeciesVAdaptive( Params &params, Patch *patch ) :
     SpeciesV( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 }//END SpeciesVAdaptive creator
@@ -275,7 +275,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec,
             //     if( mass_>0 ) {
             //
             //         for( iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-            //             if ( particles->cell_keys[iPart] != -1 ) {
+            //             if ( particles->cell_keys[iPart] >= 0 ) {
             //                 //Compute cell_keys of remaining particles
             //                 for( unsigned int i = 0 ; i<nDim_particle; i++ ) {
             //                     particles->cell_keys[iPart] *= this->length_[i];
@@ -289,7 +289,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec,
             //     } else if( mass_==0 ) {
             //
             //         for( iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-            //             if ( particles->cell_keys[iPart] != -1 ) {
+            //             if ( particles->cell_keys[iPart] >= 0 ) {
             //                  //Compute cell_keys of remaining particles
             //                 for( unsigned int i = 0 ; i<nDim_particle; i++ ) {
             //                     particles->cell_keys[iPart] *= this->length_[i];
@@ -754,7 +754,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec
                 if( mass_>0 ) {
 
                     for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPart<particles->last_index[ipack*packsize_+scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= this->length_[i];
@@ -768,7 +768,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec
                 } else if( mass_==0 ) {
 
                     for( int iPart=particles->first_index[scell] ; ( int )iPart<particles->last_index[scell]; iPart++ ) {
-                        if ( particles->cell_keys[iPart] != -1 ) {
+                        if ( particles->cell_keys[iPart] >= 0 ) {
                             //Compute cell_keys of remaining particles
                             for( unsigned int i = 0 ; i<nDim_field; i++ ) {
                                 particles->cell_keys[iPart] *= length[i];
@@ -1662,7 +1662,7 @@ void SpeciesVAdaptive::scalarPonderomotiveUpdatePositionAndCurrentsTasks( double
 
                 smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11);
                 for( int iPart=particles->first_index[first_cell_of_bin[ibin]] ; iPart<particles->last_index[last_cell_of_bin[ibin]]; iPart++ ) {
-                    if ( particles->cell_keys[iPart] != -1 ) {
+                    if ( particles->cell_keys[iPart] >= 0 ) {
                         //First reduction of the count sort algorithm. Lost particles are not included.
                         for( int i = 0 ; i<( int )nDim_field; i++ ) {
                             particles->cell_keys[iPart] *= length_[i];
diff --git a/src/Species/SpeciesVAdaptiveMixedSort.cpp b/src/Species/SpeciesVAdaptiveMixedSort.cpp
index cc809d8c3..1889f0cd8 100755
--- a/src/Species/SpeciesVAdaptiveMixedSort.cpp
+++ b/src/Species/SpeciesVAdaptiveMixedSort.cpp
@@ -46,7 +46,7 @@ using namespace std;
 SpeciesVAdaptiveMixedSort::SpeciesVAdaptiveMixedSort( Params &params, Patch *patch ) :
     SpeciesV( params, patch )
 {
-    initCluster( params );
+    initCluster( params, patch );
     npack_ = 0 ;
     packsize_ = 0;
 
diff --git a/src/Tools/Pragma.h b/src/Tools/Pragma.h
index b1a81cdae..0fb5e1e9d 100644
--- a/src/Tools/Pragma.h
+++ b/src/Tools/Pragma.h
@@ -31,7 +31,7 @@
 #if defined ( SMILEI_ACCELERATOR_GPU_OMP )
     #define ATOMIC(mode) \
     _Pragma( TOSTRING(omp atomic mode))
-#elif defined ( SMILEI_OPENACC_MODE )
+#elif defined ( SMILEI_ACCELERATOR_GPU_OACC )
     #define ATOMIC(mode) \
     _Pragma( TOSTRING(acc atomic mode))
 #endif
diff --git a/src/Tools/Timers.cpp b/src/Tools/Timers.cpp
index 0cd6dac0c..d3edda0e4 100755
--- a/src/Tools/Timers.cpp
+++ b/src/Tools/Timers.cpp
@@ -18,7 +18,7 @@ Timers::Timers( SmileiMPI *smpi ) :
     collisions( "Collisions" ),             // Call to Collisions methods
     movWindow( "Mov window" ),              // Moving Window
     loadBal( "Load balancing" ),            // Load balancing
-    syncPart( "Sync Particles" ),           // Call exchangeParticles (MPI & Patch sync)
+    syncPart( "Sync Particles" ),           // Call initExchParticles (MPI & Patch sync)
     syncField( "Sync Fields" ),             // Call sumRhoJ(s), exchangeB (MPI & Patch sync)
     syncDens( "Sync Densities" ),           // If necessary the following timers can be reintroduced
     particleMerging( "Part Merging" ),      // Particle merging
diff --git a/src/Tools/gpu.cpp b/src/Tools/gpu.cpp
index 7ce000e03..497786096 100644
--- a/src/Tools/gpu.cpp
+++ b/src/Tools/gpu.cpp
@@ -1,6 +1,6 @@
 #include "gpu.h"
 
-#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_ACCELERATOR_GPU_OACC )
     #error "You can not enable both OpenACC and OpenMP GPU support"
 #endif
 
@@ -29,7 +29,7 @@
     #else
         #error "Asking for OpenMP support without enabling compiler support for OpenMP"
     #endif
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #if defined( _OPENACC )
         #include <openacc.h>
     #else
@@ -46,11 +46,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target enter data map( alloc \
                                        : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc enter data create( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -61,11 +62,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target enter data map( to \
                                        : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc enter data copyin( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -75,11 +77,12 @@ namespace smilei {
                 const unsigned char* byte_array = static_cast<const unsigned char*>( a_host_pointer );
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target update to( byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc update device( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -89,11 +92,12 @@ namespace smilei {
                 unsigned char* byte_array = static_cast<unsigned char*>( a_host_pointer );
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target update from( byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc update host( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -104,11 +108,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target exit data map( from \
                                       : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc exit data copyout( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -119,11 +124,12 @@ namespace smilei {
 #if defined( SMILEI_ACCELERATOR_GPU_OMP )
     #pragma omp target exit data map( delete \
                                       : byte_array [0:a_count * an_object_size] )
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #pragma acc exit data delete( byte_array [0:a_count * an_object_size] )
 #else
                 SMILEI_UNUSED( a_host_pointer );
                 SMILEI_UNUSED( a_count );
+                SMILEI_UNUSED( an_object_size );
                 SMILEI_UNUSED( byte_array );
 #endif
             }
@@ -154,7 +160,7 @@ namespace smilei {
                 SMILEI_ASSERT( a_device_pointer != nullptr );
 
                 return const_cast<void*>( a_device_pointer );
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                 //return const_cast<void*>( ::acc_deviceptr( a_host_pointer ) );
                 return ::acc_deviceptr( const_cast<void*>(a_host_pointer) ) ;
 #else
@@ -171,7 +177,7 @@ namespace smilei {
                                          a_count * an_object_size, 0, 0, device_num, device_num ) != 0 ) {
                     ERROR( "omp_target_memcpy failed" );
                 }
-#elif defined( SMILEI_OPENACC_MODE )
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
                 // It seems that the interface of ::acc_memcpy_device does not accept ptr to array of const type !
                 // https://www.openacc.org/sites/default/files/inline-files/OpenACC.2.7.pdf
                 // void acc_memcpy_device( d_void* dest, d_void* src, size_t bytes );
diff --git a/src/Tools/gpu.h b/src/Tools/gpu.h
index 28a8c98da..bb8e6c472 100644
--- a/src/Tools/gpu.h
+++ b/src/Tools/gpu.h
@@ -19,10 +19,14 @@ namespace smilei {
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE     _Pragma( "omp declare target" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END _Pragma( "omp end declare target" )
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "omp atomic update" )
-#elif defined( SMILEI_OPENACC_MODE )
+    #define SMILEI_ACCELERATOR_ASYNC_POLYCY thrust::hip::par_nosync
+    #define SMILEI_ACCELERATOR_DEVICE_SYNC() hipDeviceSynchronize()
+#elif defined( SMILEI_ACCELERATOR_GPU_OACC )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "acc routine seq" )
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
     #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "acc atomic" )
+    #define SMILEI_ACCELERATOR_ASYNC_POLYCY thrust::cuda::par_nosync
+    #define SMILEI_ACCELERATOR_DEVICE_SYNC() cudaDeviceSynchronize()
 #else
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE
     #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END
diff --git a/src/Tools/gpuRandom.h b/src/Tools/gpuRandom.h
index 916a7b8f8..bdb9aca59 100644
--- a/src/Tools/gpuRandom.h
+++ b/src/Tools/gpuRandom.h
@@ -1,7 +1,7 @@
 #ifndef GPU_RANDOM
 #define GPU_RANDOM
 
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
     // #include <openacc_curand.h>
     #include "curand_kernel.h"
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
@@ -29,7 +29,7 @@ namespace smilei {
             {
             protected:
                 using State =
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     ::curandState_t;
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     // TODO
@@ -42,7 +42,7 @@ namespace smilei {
 
             public:
                 Random()
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     : a_state_{ 0xDEADBEEFU }
 #else
@@ -53,26 +53,36 @@ namespace smilei {
                 }
 
                 // Initialization
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                 void init( unsigned long long seed,
                            unsigned long long seq,
                            unsigned long long offset )
                 {
-#if defined( SMILEI_OPENACC_MODE )
                     // Cuda generator initialization
                     ::curand_init( seed, seq, offset, &a_state_ );
+                }
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
+                void init( unsigned long long seed,
+                           unsigned long long ,
+                           unsigned long long  )
+                {
                     // Hip generator initialization
                     // ::hiprand_init( seed, seq, offset, &state );
                     a_state_ = State{ static_cast<unsigned int>( seed ) };
+                }
 #else
+                void init( unsigned long long seed,
+                           unsigned long long ,
+                           unsigned long long  )
+                {
                     a_state_ = State{ static_cast<unsigned int>( seed ) };
-#endif
                 }
+#endif
 
                 // Initialization
                 double uniform()
                 {
-#if defined( SMILEI_OPENACC_MODE )
+#if defined( SMILEI_ACCELERATOR_GPU_OACC )
                     return ::curand_uniform( &a_state_ );
 #elif defined( SMILEI_ACCELERATOR_GPU_OMP )
                     // TODO
diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h
index 63753fb20..d9525723d 100755
--- a/src/Tools/userFunctions.h
+++ b/src/Tools/userFunctions.h
@@ -1,5 +1,5 @@
 
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
 #include <openacc.h>
 #endif
 
@@ -36,7 +36,7 @@ class userFunctions
     //! \param array array in which to find the value
     //! \param elem element to be found
     //! \param nb_elem number of elements
-#ifdef SMILEI_OPENACC_MODE
+#ifdef SMILEI_ACCELERATOR_GPU_OACC
     #pragma acc routine seq
 #endif
     template <class T>
diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
index ee807d65b..8d5b8ddb1 100644
--- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
+++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py
@@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds):
 
 thresholds = {}
 thresholds["points"] = np.array([0. ,10 ,100,1000])
-thresholds["factor"] = np.array([1e9, 1.,0.5, 0.2])
+thresholds["factor"] = np.array([1e9, 1.,0.7, 0.2])
 
 Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds))
 Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds))
diff --git a/validation/references/tst2d_04_laser_wake.py.txt b/validation/references/tst2d_04_laser_wake.py.txt
index 48d9eaeca..094e7c366 100755
Binary files a/validation/references/tst2d_04_laser_wake.py.txt and b/validation/references/tst2d_04_laser_wake.py.txt differ