Skip to content

Commit

Permalink
Disable host device for macros for SYCL/DPC++ (AMReX-Codes#2969)
Browse files Browse the repository at this point in the history
The host part of the AMREX_HOST_DEVICE_FOR_* macros is disabled for
SYCL/DPC++. It's really slow for compilation.
  • Loading branch information
WeiqunZhang authored Oct 1, 2022
1 parent 62379fb commit 13aa4df
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 0 deletions.
107 changes: 107 additions & 0 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ namespace Gpu {

#ifdef AMREX_USE_GPU

#ifndef AMREX_USE_DPCPP

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
Expand Down Expand Up @@ -366,6 +368,111 @@ namespace Gpu {
block3; \
}

#else
// xxxxx DPCPP todo: host disabled in host device

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}}

#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}}

#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#endif

#else

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
Expand Down
52 changes: 52 additions & 0 deletions Src/Base/AMReX_GpuLaunchFunctsG.H
Original file line number Diff line number Diff line change
Expand Up @@ -1629,8 +1629,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,n,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
AMREX_PRAGMA_SIMD
for (T i = 0; i < n; ++i) f(i);
#endif
}
}

Expand All @@ -1641,8 +1645,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,n,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
AMREX_PRAGMA_SIMD
for (T i = 0; i < n; ++i) f(i);
#endif
}
}

Expand All @@ -1667,7 +1675,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info, box,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,std::forward<L>(f));
#endif
}
}

Expand All @@ -1678,7 +1690,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info, box,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,std::forward<L>(f));
#endif
}
}

Expand All @@ -1689,7 +1705,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&&
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info, box,ncomp,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,ncomp,std::forward<L>(f));
#endif
}
}

Expand All @@ -1700,7 +1720,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&&
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info, box,ncomp,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,ncomp,std::forward<L>(f));
#endif
}
}

Expand All @@ -1712,8 +1736,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1725,8 +1753,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1740,9 +1772,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
ParallelFor<MT>(info,box1,box2,box3,
std::forward<L1>(f1),std::forward<L2>(f2),std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,std::forward<L3>(f3));
#endif
}
}

Expand All @@ -1757,8 +1793,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,box1,ncomp1,std::forward<L1>(f1),box2,ncomp2,std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1773,8 +1813,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,box1,ncomp1,std::forward<L1>(f1),box2,ncomp2,std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1794,9 +1838,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
box2,ncomp2,std::forward<L2>(f2),
box3,ncomp3,std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,ncomp3,std::forward<L3>(f3));
#endif
}
}

Expand All @@ -1816,9 +1864,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
box2,ncomp2,std::forward<L2>(f2),
box3,ncomp3,std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,ncomp3,std::forward<L3>(f3));
#endif
}
}

Expand Down
Loading

0 comments on commit 13aa4df

Please sign in to comment.