From 05f15bfd598e1147eaf234139ae9bf1053c5abba Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 4 Apr 2024 00:54:04 +0200 Subject: [PATCH 1/5] Fix names in the ROCm packages documentation --- HeterogeneousTest/ROCmDevice/README.md | 8 ++++---- HeterogeneousTest/ROCmKernel/README.md | 10 +++++----- HeterogeneousTest/ROCmOpaque/README.md | 6 +++--- HeterogeneousTest/ROCmWrapper/README.md | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/HeterogeneousTest/ROCmDevice/README.md b/HeterogeneousTest/ROCmDevice/README.md index cb1142be6c3ad..e068e0efa27e2 100644 --- a/HeterogeneousTest/ROCmDevice/README.md +++ b/HeterogeneousTest/ROCmDevice/README.md @@ -12,20 +12,20 @@ ROCm-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/ROCmDevice` implements a library that defines and exports ROCm device-side functions: ```c++ -namespace cms::cudatest { +namespace cms::rocmtest { __device__ void add_vectors_f(...); __device__ void add_vectors_d(...); -} // namespace cms::cudatest +} // namespace cms::rocmtest ``` The `plugins` directory implements the `ROCmTestDeviceAdditionModule` `EDAnalyzer` that launches a ROCm kernel using the functions defined in ths library. As a byproduct this plugin also shows how to split an `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a -device part (in a `.cu` file). +device part (in a `.hip.cc` file). -The `test` directory implements the `testCudaDeviceAddition` binary that launches a ROCm kernel +The `test` directory implements the `testRocmDeviceAddition` binary that launches a ROCm kernel using these functions. It also contains the `testROCmTestDeviceAdditionModule.py` python configuration to exercise the `ROCmTestDeviceAdditionModule` plugin. diff --git a/HeterogeneousTest/ROCmKernel/README.md b/HeterogeneousTest/ROCmKernel/README.md index e857d1a5477be..0e6b7688d663c 100644 --- a/HeterogeneousTest/ROCmKernel/README.md +++ b/HeterogeneousTest/ROCmKernel/README.md @@ -12,21 +12,21 @@ ROCm-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/ROCmKernel` implements a library that defines and exports ROCm kernels that call the device functions defined in the `HeterogeneousTest/ROCmDevice` library: ```c++ -namespace cms::cudatest { +namespace cms::rocmtest { __global__ void kernel_add_vectors_f(...); __global__ void kernel_add_vectors_d(...); -} // namespace cms::cudatest +} // namespace cms::rocmtest ``` The `plugins` directory implements the `ROCmTestKernelAdditionModule` `EDAnalyzer` that launches the ROCm kernels defined in this library. As a byproduct this plugin also shows how to split an `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a device part (in -a `.cu` file). +a `.hip.cc` file). -The `test` directory implements the `testCudaKernelAddition` test binary that launches the ROCm kernel -defined in this library. +The `test` directory implements the `testRocmKernelAddition` test binary that launches the ROCm +kernel defined in this library. It also contains the `testROCmTestKernelAdditionModule.py` python configuration to exercise the `ROCmTestKernelAdditionModule` module. diff --git a/HeterogeneousTest/ROCmOpaque/README.md b/HeterogeneousTest/ROCmOpaque/README.md index 4da89f879e12d..c0bc02f4b46fd 100644 --- a/HeterogeneousTest/ROCmOpaque/README.md +++ b/HeterogeneousTest/ROCmOpaque/README.md @@ -12,19 +12,19 @@ ROCm-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/ROCmOpaque` implements a non-ROCm aware library, with functions that call the wrappers defined in the `HeterogeneousTest/ROCmWrapper` library: ```c++ -namespace cms::cudatest { +namespace cms::rocmtest { void opaque_add_vectors_f(...); void opaque_add_vectors_d(...); -} // namespace cms::cudatest +} // namespace cms::rocmtest ``` The `plugins` directory implements the `ROCmTestOpqaueAdditionModule` `EDAnalyzer` that calls the function defined in this library. This plugin shows how the function can be used directly from a host-only, non-ROCm aware plugin. -The `test` directory implements the `testCudaDeviceAdditionOpqaue` test binary that calls the +The `test` directory implements the `testRocmDeviceAdditionOpqaue` test binary that calls the function defined in this library, and shows how they can be used directly from a host-only, non-ROCm aware application. It also contains the `testROCmTestOpqaueAdditionModule.py` python configuration to exercise the diff --git a/HeterogeneousTest/ROCmWrapper/README.md b/HeterogeneousTest/ROCmWrapper/README.md index 4ccdcea86958f..10e23326529c5 100644 --- a/HeterogeneousTest/ROCmWrapper/README.md +++ b/HeterogeneousTest/ROCmWrapper/README.md @@ -12,12 +12,12 @@ ROCm-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/ROCmWrapper` implements a library that defines and exports host-side wrappers that launch the kernels defined in the `HeterogeneousTest/ROCmKernel` library: ```c++ -namespace cms::cudatest { +namespace cms::rocmtest { void wrapper_add_vectors_f(...); void wrapper_add_vectors_d(...); -} // namespace cms::cudatest +} // namespace cms::rocmtest ``` These wrappers can be used from host-only, non-ROCm aware libraries, plugins and applications. They can be linked with the standard host linker. @@ -26,7 +26,7 @@ The `plugins` directory implements the `ROCmTestWrapperAdditionModule` `EDAnalyz wrappers defined in this library. This plugin shows how the wrappers can be used directly from a host-only, non-ROCm aware plugin. -The `test` directory implements the `testCudaDeviceAdditionWrapper` test binary that calls the +The `test` directory implements the `testRocmDeviceAdditionWrapper` test binary that calls the wrappers defined in this library, and shows how they can be used directly from a host-only, non-ROCm aware application. It also contains the `testROCmTestWrapperAdditionModule.py` python configuration to exercise the From 15e21bef64ab60879a8df7cc1e86f206e46e8cd2 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 5 Apr 2024 21:29:51 +0200 Subject: [PATCH 2/5] Fix typos in the ROCm tests --- HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h | 4 ++-- .../ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc | 2 +- HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc | 4 ++-- HeterogeneousTest/ROCmOpaque/test/BuildFile.xml | 2 +- HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h b/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h index 9d4a314bd17c9..5b3b9a9a1b9bb 100644 --- a/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h +++ b/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h @@ -5,9 +5,9 @@ namespace cms::rocmtest { - void opqaue_add_vectors_f(const float* in1, const float* in2, float* out, size_t size); + void opaque_add_vectors_f(const float* in1, const float* in2, float* out, size_t size); - void opqaue_add_vectors_d(const double* in1, const double* in2, double* out, size_t size); + void opaque_add_vectors_d(const double* in1, const double* in2, double* out, size_t size); } // namespace cms::rocmtest diff --git a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc index e3315fa0ff0e4..89c8a4fc8fdc4 100644 --- a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc +++ b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc @@ -65,7 +65,7 @@ void ROCmTestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& even } // launch the 1-dimensional kernel for vector addition - cms::rocmtest::opqaue_add_vectors_f(in1.data(), in2.data(), out.data(), size_); + cms::rocmtest::opaque_add_vectors_f(in1.data(), in2.data(), out.data(), size_); // check the results for (size_t i = 0; i < size_; ++i) { diff --git a/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc b/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc index 3de89369df8a9..780990c63d303 100644 --- a/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc +++ b/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc @@ -8,7 +8,7 @@ namespace cms::rocmtest { - void opqaue_add_vectors_f(const float* in1_h, const float* in2_h, float* out_h, size_t size) { + void opaque_add_vectors_f(const float* in1_h, const float* in2_h, float* out_h, size_t size) { // allocate input and output buffers on the device float* in1_d; float* in2_d; @@ -39,7 +39,7 @@ namespace cms::rocmtest { hipCheck(hipFree(out_d)); } - void opqaue_add_vectors_d(const double* in1_h, const double* in2_h, double* out_h, size_t size) { + void opaque_add_vectors_d(const double* in1_h, const double* in2_h, double* out_h, size_t size) { // allocate input and output buffers on the device double* in1_d; double* in2_d; diff --git a/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml b/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml index a26e1a8a43b05..e284a4dcd1b33 100644 --- a/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml +++ b/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml @@ -7,5 +7,5 @@ - + diff --git a/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc b/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc index c3ea68dbce85d..cf08bb9d74447 100644 --- a/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc +++ b/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc @@ -37,7 +37,7 @@ TEST_CASE("HeterogeneousTest/ROCmOpaque test", "[rocmTestOpaqueAdditionOpaque]") SECTION("Test add_vectors_f") { // launch the 1-dimensional kernel for vector addition - REQUIRE_NOTHROW(cms::rocmtest::opqaue_add_vectors_f(in1.data(), in2.data(), out.data(), size)); + REQUIRE_NOTHROW(cms::rocmtest::opaque_add_vectors_f(in1.data(), in2.data(), out.data(), size)); // check the results for (size_t i = 0; i < size; ++i) { From c8ec63cfe492dc529e1f02c0a818e815978f9996 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 4 Apr 2024 01:10:59 +0200 Subject: [PATCH 3/5] Fix line wrap --- HeterogeneousTest/CUDAKernel/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HeterogeneousTest/CUDAKernel/README.md b/HeterogeneousTest/CUDAKernel/README.md index b09e2163ff7c3..b8eab9838f243 100644 --- a/HeterogeneousTest/CUDAKernel/README.md +++ b/HeterogeneousTest/CUDAKernel/README.md @@ -25,8 +25,8 @@ CUDA kernels defined in this library. As a byproduct this plugin also shows how `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a device part (in a `.cu` file). -The `test` directory implements the `testCudaKernelAddition` test binary that launches the CUDA kernel -defined in this library. +The `test` directory implements the `testCudaKernelAddition` test binary that launches the CUDA +kernel defined in this library. It also contains the `testCUDATestKernelAdditionModule.py` python configuration to exercise the `CUDATestKernelAdditionModule` module. From f0801461acf3539022ffcee42fc399b5169c290e Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Fri, 5 Apr 2024 09:36:53 +0200 Subject: [PATCH 4/5] Update "private" namespace names to match package names --- .../CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu | 4 ++-- .../CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h | 4 ++-- .../CUDADevice/plugins/CUDATestDeviceAdditionModule.cc | 2 +- .../CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu | 4 ++-- .../CUDAKernel/plugins/CUDATestKernelAdditionAlgo.h | 4 ++-- .../CUDAKernel/plugins/CUDATestKernelAdditionModule.cc | 2 +- .../ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h | 4 ++-- .../ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc | 4 ++-- .../ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc | 2 +- .../ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h | 4 ++-- .../ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc | 4 ++-- .../ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu index cd3259a43cd10..483b0ab3f058a 100644 --- a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu +++ b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.cu @@ -7,7 +7,7 @@ #include "CUDATestDeviceAdditionAlgo.h" -namespace HeterogeneousCoreCUDATestDevicePlugins { +namespace HeterogeneousTestCUDADevicePlugins { __global__ void kernel_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, @@ -24,4 +24,4 @@ namespace HeterogeneousCoreCUDATestDevicePlugins { cudaCheck(cudaGetLastError()); } -} // namespace HeterogeneousCoreCUDATestDevicePlugins +} // namespace HeterogeneousTestCUDADevicePlugins diff --git a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h index b2001c2dff117..7ee532bac2f58 100644 --- a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h +++ b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionAlgo.h @@ -3,13 +3,13 @@ #include -namespace HeterogeneousCoreCUDATestDevicePlugins { +namespace HeterogeneousTestCUDADevicePlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size); -} // namespace HeterogeneousCoreCUDATestDevicePlugins +} // namespace HeterogeneousTestCUDADevicePlugins #endif // HeterogeneousTest_CUDADevice_plugins_CUDATestDeviceAdditionAlgo_h diff --git a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc index c5d7f7ac272be..a57859d210815 100644 --- a/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc +++ b/HeterogeneousTest/CUDADevice/plugins/CUDATestDeviceAdditionModule.cc @@ -84,7 +84,7 @@ void CUDATestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& even cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float))); // launch the 1-dimensional kernel for vector addition - HeterogeneousCoreCUDATestDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); + HeterogeneousTestCUDADevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); // copy the results from the device to the host cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost)); diff --git a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu index 549a5643a9e85..a1736ca1ae846 100644 --- a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu +++ b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.cu @@ -7,7 +7,7 @@ #include "CUDATestKernelAdditionAlgo.h" -namespace HeterogeneousCoreCUDATestKernelPlugins { +namespace HeterogeneousTestCUDAKernelPlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, @@ -17,4 +17,4 @@ namespace HeterogeneousCoreCUDATestKernelPlugins { cudaCheck(cudaGetLastError()); } -} // namespace HeterogeneousCoreCUDATestKernelPlugins +} // namespace HeterogeneousTestCUDAKernelPlugins diff --git a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.h b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.h index 159b867d1a007..80433308f6c1e 100644 --- a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.h +++ b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionAlgo.h @@ -3,13 +3,13 @@ #include -namespace HeterogeneousCoreCUDATestKernelPlugins { +namespace HeterogeneousTestCUDAKernelPlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size); -} // namespace HeterogeneousCoreCUDATestKernelPlugins +} // namespace HeterogeneousTestCUDAKernelPlugins #endif // HeterogeneousTest_CUDAKernel_plugins_CUDATestKernelAdditionAlgo_h diff --git a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc index 666e9acd537ca..bb18885e00b86 100644 --- a/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc +++ b/HeterogeneousTest/CUDAKernel/plugins/CUDATestKernelAdditionModule.cc @@ -84,7 +84,7 @@ void CUDATestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& even cudaCheck(cudaMemset(out_d, 0, size_ * sizeof(float))); // launch the 1-dimensional kernel for vector addition - HeterogeneousCoreCUDATestKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); + HeterogeneousTestCUDAKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); // copy the results from the device to the host cudaCheck(cudaMemcpy(out_h.data(), out_d, size_ * sizeof(float), cudaMemcpyDeviceToHost)); diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h index 17a04ef5d23d0..2b12616c1b1d7 100644 --- a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h +++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h @@ -3,13 +3,13 @@ #include -namespace HeterogeneousCoreROCmTestDevicePlugins { +namespace HeterogeneousTestROCmDevicePlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size); -} // namespace HeterogeneousCoreROCmTestDevicePlugins +} // namespace HeterogeneousTestROCmDevicePlugins #endif // HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc index 3d54ecdf04e83..2c4dbb525133c 100644 --- a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc +++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc @@ -7,7 +7,7 @@ #include "ROCmTestDeviceAdditionAlgo.h" -namespace HeterogeneousCoreROCmTestDevicePlugins { +namespace HeterogeneousTestROCmDevicePlugins { __global__ void kernel_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, @@ -24,4 +24,4 @@ namespace HeterogeneousCoreROCmTestDevicePlugins { hipCheck(hipGetLastError()); } -} // namespace HeterogeneousCoreROCmTestDevicePlugins +} // namespace HeterogeneousTestROCmDevicePlugins diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc index bf46ae35da8bf..e8050b63a3d41 100644 --- a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc +++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc @@ -84,7 +84,7 @@ void ROCmTestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& even hipCheck(hipMemset(out_d, 0, size_ * sizeof(float))); // launch the 1-dimensional kernel for vector addition - HeterogeneousCoreROCmTestDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); + HeterogeneousTestROCmDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); // copy the results from the device to the host hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost)); diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h index 2461fad80ff17..08417b512da0a 100644 --- a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h +++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h @@ -3,13 +3,13 @@ #include -namespace HeterogeneousCoreROCmTestKernelPlugins { +namespace HeterogeneousTestROCmKernelPlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, size_t size); -} // namespace HeterogeneousCoreROCmTestKernelPlugins +} // namespace HeterogeneousTestROCmKernelPlugins #endif // HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc index 6239e70905196..8f2e12665fb55 100644 --- a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc +++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc @@ -7,7 +7,7 @@ #include "ROCmTestKernelAdditionAlgo.h" -namespace HeterogeneousCoreROCmTestKernelPlugins { +namespace HeterogeneousTestROCmKernelPlugins { void wrapper_add_vectors_f(const float* __restrict__ in1, const float* __restrict__ in2, @@ -17,4 +17,4 @@ namespace HeterogeneousCoreROCmTestKernelPlugins { hipCheck(hipGetLastError()); } -} // namespace HeterogeneousCoreROCmTestKernelPlugins +} // namespace HeterogeneousTestROCmKernelPlugins diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc index c33e42e3c49b0..108e78f4c79f4 100644 --- a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc +++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc @@ -84,7 +84,7 @@ void ROCmTestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& even hipCheck(hipMemset(out_d, 0, size_ * sizeof(float))); // launch the 1-dimensional kernel for vector addition - HeterogeneousCoreROCmTestKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); + HeterogeneousTestROCmKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_); // copy the results from the device to the host hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost)); From ceaac242e6b769b31e5c3ff23f01ff1fbf74757c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 29 Jan 2023 23:20:30 +0100 Subject: [PATCH 5/5] Add a test for Alpaka libraries and build rules The package HeterogeneousTest/AlpakaDevice implements a header only library that defines Alpaka device-only functions, and a plugin and test that use them. The package HeterogeneousTest/AlpakaKernel implements a header only library that imports device functions from HeterogeneousTest/AlpakaDevice to define Alpaka kernels, and a plugin and test that use them. The package HeterogeneousTest/AlpakaWrapper implements a library that imports kernels from HeterogeneousTest/AlpakaKernel to define and export host-only wrappers around them, usable by non-Alpaka libraries, plugins and applications, and implements a plugin and test that use them. The package HeterogeneousTest/AlpakaOpaque implements a library that imports kernels from HeterogeneousTest/AlpakaKernel to define and export host-only wrappers around the whole Alpaka section, usable by libraries, plugins and applications that are not explicitly Alpaka-aware, and implements a plugin and test that use them. --- HeterogeneousTest/AlpakaDevice/BuildFile.xml | 2 + HeterogeneousTest/AlpakaDevice/README.md | 49 +++++++ .../interface/alpaka/DeviceAddition.h | 36 +++++ .../AlpakaDevice/plugins/BuildFile.xml | 11 ++ .../AlpakaTestDeviceAdditionAlgo.dev.cc | 32 +++++ .../alpaka/AlpakaTestDeviceAdditionAlgo.h | 18 +++ .../alpaka/AlpakaTestDeviceAdditionModule.cc | 124 ++++++++++++++++++ .../AlpakaDevice/test/BuildFile.xml | 9 ++ .../test/alpaka/testDeviceAddition.dev.cc | 102 ++++++++++++++ .../testAlpakaTestDeviceAdditionModule.py | 15 +++ HeterogeneousTest/AlpakaKernel/BuildFile.xml | 3 + HeterogeneousTest/AlpakaKernel/README.md | 53 ++++++++ .../interface/alpaka/DeviceAdditionKernel.h | 36 +++++ .../AlpakaKernel/plugins/BuildFile.xml | 11 ++ .../AlpakaTestKernelAdditionAlgo.dev.cc | 22 ++++ .../alpaka/AlpakaTestKernelAdditionAlgo.h | 18 +++ .../alpaka/AlpakaTestKernelAdditionModule.cc | 124 ++++++++++++++++++ .../AlpakaKernel/test/BuildFile.xml | 9 ++ .../alpaka/testDeviceAdditionKernel.dev.cc | 91 +++++++++++++ .../testAlpakaTestKernelAdditionModule.py | 15 +++ HeterogeneousTest/AlpakaOpaque/BuildFile.xml | 7 + HeterogeneousTest/AlpakaOpaque/README.md | 46 +++++++ .../interface/alpaka/DeviceAdditionOpaque.h | 16 +++ .../AlpakaOpaque/plugins/BuildFile.xml | 10 ++ .../alpaka/AlpakaTestOpaqueAdditionModule.cc | 94 +++++++++++++ .../src/alpaka/DeviceAdditionOpaque.cc | 85 ++++++++++++ .../AlpakaOpaque/test/BuildFile.xml | 10 ++ .../test/alpaka/testDeviceAdditionOpaque.cc | 54 ++++++++ .../test/testAlpakaTestAdditionModules.py | 31 +++++ .../testAlpakaTestOpaqueAdditionModule.py | 15 +++ HeterogeneousTest/AlpakaWrapper/BuildFile.xml | 7 + HeterogeneousTest/AlpakaWrapper/README.md | 48 +++++++ .../interface/alpaka/DeviceAdditionWrapper.h | 24 ++++ .../AlpakaWrapper/plugins/BuildFile.xml | 11 ++ .../alpaka/AlpakaTestWrapperAdditionModule.cc | 122 +++++++++++++++++ .../src/alpaka/DeviceAdditionWrapper.dev.cc | 30 +++++ .../AlpakaWrapper/test/BuildFile.xml | 9 ++ .../test/alpaka/testDeviceAdditionWrapper.cc | 85 ++++++++++++ .../testAlpakaTestWrapperAdditionModule.py | 15 +++ 39 files changed, 1499 insertions(+) create mode 100644 HeterogeneousTest/AlpakaDevice/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaDevice/README.md create mode 100644 HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h create mode 100644 HeterogeneousTest/AlpakaDevice/plugins/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc create mode 100644 HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.h create mode 100644 HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionModule.cc create mode 100644 HeterogeneousTest/AlpakaDevice/test/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc create mode 100644 HeterogeneousTest/AlpakaDevice/test/testAlpakaTestDeviceAdditionModule.py create mode 100644 HeterogeneousTest/AlpakaKernel/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaKernel/README.md create mode 100644 HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h create mode 100644 HeterogeneousTest/AlpakaKernel/plugins/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc create mode 100644 HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.h create mode 100644 HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionModule.cc create mode 100644 HeterogeneousTest/AlpakaKernel/test/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc create mode 100644 HeterogeneousTest/AlpakaKernel/test/testAlpakaTestKernelAdditionModule.py create mode 100644 HeterogeneousTest/AlpakaOpaque/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaOpaque/README.md create mode 100644 HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h create mode 100644 HeterogeneousTest/AlpakaOpaque/plugins/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaOpaque/plugins/alpaka/AlpakaTestOpaqueAdditionModule.cc create mode 100644 HeterogeneousTest/AlpakaOpaque/src/alpaka/DeviceAdditionOpaque.cc create mode 100644 HeterogeneousTest/AlpakaOpaque/test/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaOpaque/test/alpaka/testDeviceAdditionOpaque.cc create mode 100644 HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py create mode 100644 HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestOpaqueAdditionModule.py create mode 100644 HeterogeneousTest/AlpakaWrapper/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaWrapper/README.md create mode 100644 HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h create mode 100644 HeterogeneousTest/AlpakaWrapper/plugins/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaWrapper/plugins/alpaka/AlpakaTestWrapperAdditionModule.cc create mode 100644 HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc create mode 100644 HeterogeneousTest/AlpakaWrapper/test/BuildFile.xml create mode 100644 HeterogeneousTest/AlpakaWrapper/test/alpaka/testDeviceAdditionWrapper.cc create mode 100644 HeterogeneousTest/AlpakaWrapper/test/testAlpakaTestWrapperAdditionModule.py diff --git a/HeterogeneousTest/AlpakaDevice/BuildFile.xml b/HeterogeneousTest/AlpakaDevice/BuildFile.xml new file mode 100644 index 0000000000000..33da29f0df749 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/BuildFile.xml @@ -0,0 +1,2 @@ + + diff --git a/HeterogeneousTest/AlpakaDevice/README.md b/HeterogeneousTest/AlpakaDevice/README.md new file mode 100644 index 0000000000000..4815e1a1f0200 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/README.md @@ -0,0 +1,49 @@ +# Introduction + +The packages `HeterogeneousTest/AlpakaDevice`, `HeterogeneousTest/AlpakaKernel`, +`HeterogeneousTest/AlpakaWrapper` and `HeterogeneousTest/AlpakaOpaque` implement a set of libraries, +plugins and tests to exercise the build rules for Alpaka. +In particular, these tests show what is supported and what are the limitations implementing +Alpaka-based libraries, and using them from multiple plugins. + + +# `HeterogeneousTest/AlpakaDevice` + +The package `HeterogeneousTest/AlpakaDevice` implements a library that defines and exports Alpaka +device-side functions: +```c++ +namespace cms::alpakatest { + + template + ALPAKA_FN_ACC void add_vectors_f(TAcc const& acc, ...); + + template + ALPAKA_FN_ACC void add_vectors_d(TAcc const& acc, ...); + +} // namespace cms::alpakatest +``` + +The `plugins` directory implements the `AlpakaTestDeviceAdditionModule` `EDAnalyzer` that launches +an Alpaka kernel using the functions defined in ths library. As a byproduct this plugin also shows +how to split an `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and +a device part (in a `.dev.cc` file). + +The `test` directory implements the `testAlpakaDeviceAddition` binary that launches a Alpaka kernel +using these functions. +It also contains the `testAlpakaTestDeviceAdditionModule.py` python configuration to exercise the +`AlpakaTestDeviceAdditionModule` plugin. + + +# Other packages + +For various ways in which this library and plugin can be tested, see also the other +`HeterogeneousTest/Alpaka...` packages: + - [`HeterogeneousTest/AlpakaKernel/README.md`](../../HeterogeneousTest/AlpakaKernel/README.md) + - [`HeterogeneousTest/AlpakaWrapper/README.md`](../../HeterogeneousTest/AlpakaWrapper/README.md) + - [`HeterogeneousTest/AlpakaOpaque/README.md`](../../HeterogeneousTest/AlpakaOpaque/README.md) + + +# Combining plugins + +`HeterogeneousTest/AlpakaOpaque/test` contains the `testAlpakaTestAdditionModules.py` python +configuration that exercise all four plugins in a single application. diff --git a/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h b/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h new file mode 100644 index 0000000000000..9a38952b2c2b1 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h @@ -0,0 +1,36 @@ +#ifndef HeterogeneousTest_AlpakaDevice_interface_alpaka_DeviceAddition_h +#define HeterogeneousTest_AlpakaDevice_interface_alpaka_DeviceAddition_h + +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +namespace cms::alpakatest { + + template + ALPAKA_FN_ACC void add_vectors_f(TAcc const& acc, + float const* __restrict__ in1, + float const* __restrict__ in2, + float* __restrict__ out, + uint32_t size) { + for (auto i : cms::alpakatools::uniform_elements(acc, size)) { + out[i] = in1[i] + in2[i]; + } + } + + template + ALPAKA_FN_ACC void add_vectors_d(TAcc const& acc, + double const* __restrict__ in1, + double const* __restrict__ in2, + double* __restrict__ out, + uint32_t size) { + for (auto i : cms::alpakatools::uniform_elements(acc, size)) { + out[i] = in1[i] + in2[i]; + } + } + +} // namespace cms::alpakatest + +#endif // HeterogeneousTest_AlpakaDevice_interface_alpaka_DeviceAddition_h diff --git a/HeterogeneousTest/AlpakaDevice/plugins/BuildFile.xml b/HeterogeneousTest/AlpakaDevice/plugins/BuildFile.xml new file mode 100644 index 0000000000000..7601109f77e70 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/plugins/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc new file mode 100644 index 0000000000000..e8e899e354ac1 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc @@ -0,0 +1,32 @@ +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" + +#include "AlpakaTestDeviceAdditionAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaDevicePlugins { + + struct KernelAddVectorsF { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) const { + cms::alpakatest::add_vectors_f(acc, in1, in2, out, size); + } + }; + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) { + alpaka::exec(queue, cms::alpakatools::make_workdiv(32, 32), KernelAddVectorsF{}, in1, in2, out, size); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaDevicePlugins diff --git a/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.h b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.h new file mode 100644 index 0000000000000..3bac24f61d5f8 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.h @@ -0,0 +1,18 @@ +#ifndef HeterogeneousTest_AlpakaDevice_plugins_alpaka_AlpakaTestDeviceAdditionAlgo_h +#define HeterogeneousTest_AlpakaDevice_plugins_alpaka_AlpakaTestDeviceAdditionAlgo_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaDevicePlugins { + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaDevicePlugins + +#endif // HeterogeneousTest_AlpakaDevice_plugins_alpaka_AlpakaTestDeviceAdditionAlgo_h diff --git a/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionModule.cc b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionModule.cc new file mode 100644 index 0000000000000..3ce3b451ef851 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionModule.cc @@ -0,0 +1,124 @@ +#include +#include +#include +#include + +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +#include "AlpakaTestDeviceAdditionAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class AlpakaTestDeviceAdditionModule : public edm::global::EDAnalyzer<> { + public: + explicit AlpakaTestDeviceAdditionModule(edm::ParameterSet const& config); + ~AlpakaTestDeviceAdditionModule() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override; + + private: + const uint32_t size_; + }; + + AlpakaTestDeviceAdditionModule::AlpakaTestDeviceAdditionModule(edm::ParameterSet const& config) + : size_(config.getParameter("size")) {} + + void AlpakaTestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("size", 1024 * 1024); + + // ignore the alpaka = cms.untracked.PSet(...) injected by the framework + edm::ParameterSetDescription alpaka; + alpaka.setAllowAnything(); + desc.addUntracked("alpaka", alpaka); + + descriptions.addWithDefaultLabel(desc); + } + + void AlpakaTestDeviceAdditionModule::analyze(edm::StreamID, + edm::Event const& event, + edm::EventSetup const& setup) const { + // require a valid Alpaka backend for running + edm::Service service; + if (not service or not service->enabled()) { + std::cout << "The " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) + << " is not available or disabled, the test will be skipped.\n"; + return; + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // allocate input and output host buffers + std::vector in1_h(size_); + std::vector in2_h(size_); + std::vector out_h(size_); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size_; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size_); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size_); + auto out_d = cms::alpakatools::make_device_buffer(queue, size_); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size_); + alpaka::memcpy(queue, in2_d, in2_h, size_); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + HeterogeneousTestAlpakaDevicePlugins::wrapper_add_vectors_f( + queue, in1_d.data(), in2_d.data(), out_d.data(), size_); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // check the results + for (uint32_t i = 0; i < size_; ++i) { + float sum = in1_h[i] + in2_h[i]; + assert(out_h[i] < sum + epsilon); + assert(out_h[i] > sum - epsilon); + } + } + + std::cout << "All tests passed.\n"; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(AlpakaTestDeviceAdditionModule); diff --git a/HeterogeneousTest/AlpakaDevice/test/BuildFile.xml b/HeterogeneousTest/AlpakaDevice/test/BuildFile.xml new file mode 100644 index 0000000000000..b8b03a57abc91 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/test/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc b/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc new file mode 100644 index 0000000000000..b73cd5b74279c --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc @@ -0,0 +1,102 @@ +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +struct KernelAddVectorsF { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) const { + cms::alpakatest::add_vectors_f(acc, in1, in2, out, size); + } +}; + +TEST_CASE("HeterogeneousTest/AlpakaDevice test", "[alpakaTestDeviceAddition]") { + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, " + "the test will be skipped."); + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + constexpr uint32_t size = 1024 * 1024; + + // allocate input and output host buffers + std::vector in1_h(size); + std::vector in2_h(size); + std::vector out_h(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") { + REQUIRE_NOTHROW([&]() { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size); + alpaka::memcpy(queue, in2_d, in2_h, size); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + alpaka::exec(queue, + cms::alpakatools::make_workdiv(32, 32), + KernelAddVectorsF{}, + in1_d.data(), + in2_d.data(), + out_d.data(), + size); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d, size); + + // wait for all the operations to complete + alpaka::wait(queue); + }()); + + // check the results + for (uint32_t i = 0; i < size; ++i) { + float sum = in1_h[i] + in2_h[i]; + CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon)); + } + } + } +} diff --git a/HeterogeneousTest/AlpakaDevice/test/testAlpakaTestDeviceAdditionModule.py b/HeterogeneousTest/AlpakaDevice/test/testAlpakaTestDeviceAdditionModule.py new file mode 100644 index 0000000000000..79c7b0e9c51db --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/test/testAlpakaTestDeviceAdditionModule.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TestAlpakaTestDeviceAdditionModule') +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.source = cms.Source('EmptySource') + +process.alpakaTestDeviceAdditionModule = cms.EDAnalyzer('AlpakaTestDeviceAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.path = cms.Path(process.alpakaTestDeviceAdditionModule) + +process.maxEvents.input = 1 diff --git a/HeterogeneousTest/AlpakaKernel/BuildFile.xml b/HeterogeneousTest/AlpakaKernel/BuildFile.xml new file mode 100644 index 0000000000000..dbd2e61ca61ea --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/BuildFile.xml @@ -0,0 +1,3 @@ + + + diff --git a/HeterogeneousTest/AlpakaKernel/README.md b/HeterogeneousTest/AlpakaKernel/README.md new file mode 100644 index 0000000000000..fd87ed2cf0d01 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/README.md @@ -0,0 +1,53 @@ +# Introduction + +The packages `HeterogeneousTest/AlpakaDevice`, `HeterogeneousTest/AlpakaKernel`, +`HeterogeneousTest/AlpakaWrapper` and `HeterogeneousTest/AlpakaOpaque` implement a set of libraries, +plugins and tests to exercise the build rules for Alpaka. +In particular, these tests show what is supported and what are the limitations implementing +Alpaka-based libraries, and using them from multiple plugins. + + +# `HeterogeneousTest/AlpakaKernel` + +The package `HeterogeneousTest/AlpakaKernel` implements a library that defines and exports Alpaka +kernels that call the device functions defined in the `HeterogeneousTest/AlpakaDevice` library: +```c++ +namespace cms::alpakatest { + + struct KernelAddVectorsF { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, ...) const; + }; + + struct KernelAddVectorsD { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, ...) const; + }; + +} // namespace cms::alpakatest +``` + +The `plugins` directory implements the `AlpakaTestKernelAdditionModule` `EDAnalyzer` that launches +the Alpaka kernels defined in this library. As a byproduct this plugin also shows how to split an +`EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a device part (in +a `.dev.cc` file). + +The `test` directory implements the `testAlpakaKernelAddition` test binary that launches the Alpaka +kernel defined in this library. +It also contains the `testAlpakaTestKernelAdditionModule.py` python configuration to exercise the +`AlpakaTestKernelAdditionModule` module. + + +# Other packages + +For various ways in which this library and plugin can be tested, see also the other +`HeterogeneousTest/Alpaka...` packages: + - [`HeterogeneousTest/AlpakaDevice/README.md`](../../HeterogeneousTest/AlpakaDevice/README.md) + - [`HeterogeneousTest/AlpakaWrapper/README.md`](../../HeterogeneousTest/AlpakaWrapper/README.md) + - [`HeterogeneousTest/AlpakaOpaque/README.md`](../../HeterogeneousTest/AlpakaOpaque/README.md) + + +# Combining plugins + +`HeterogeneousTest/AlpakaOpaque/test` contains the `testAlpakaTestAdditionModules.py` python +configuration that exercise all four plugins in a single application. diff --git a/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h b/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h new file mode 100644 index 0000000000000..43d99270e32b8 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h @@ -0,0 +1,36 @@ +#ifndef HeterogeneousTest_AlpakaKernel_interface_alpaka_DeviceAdditionKernel_h +#define HeterogeneousTest_AlpakaKernel_interface_alpaka_DeviceAdditionKernel_h + +#include + +#include + +#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" + +namespace cms::alpakatest { + + struct KernelAddVectorsF { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) const { + add_vectors_f(acc, in1, in2, out, size); + } + }; + + struct KernelAddVectorsD { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + const double* __restrict__ in1, + const double* __restrict__ in2, + double* __restrict__ out, + uint32_t size) const { + add_vectors_d(acc, in1, in2, out, size); + } + }; + +} // namespace cms::alpakatest + +#endif // HeterogeneousTest_AlpakaKernel_interface_alpaka_DeviceAdditionKernel_h diff --git a/HeterogeneousTest/AlpakaKernel/plugins/BuildFile.xml b/HeterogeneousTest/AlpakaKernel/plugins/BuildFile.xml new file mode 100644 index 0000000000000..9afe990758c74 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/plugins/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc new file mode 100644 index 0000000000000..0cf8caa3769c9 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc @@ -0,0 +1,22 @@ +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h" + +#include "AlpakaTestKernelAdditionAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins { + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) { + alpaka::exec( + queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsF{}, in1, in2, out, size); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins diff --git a/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.h b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.h new file mode 100644 index 0000000000000..268c2117144f3 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.h @@ -0,0 +1,18 @@ +#ifndef HeterogeneousTest_AlpakaKernel_plugins_alpaka_AlpakaTestKernelAdditionAlgo_h +#define HeterogeneousTest_AlpakaKernel_plugins_alpaka_AlpakaTestKernelAdditionAlgo_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins { + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins + +#endif // HeterogeneousTest_AlpakaKernel_plugins_alpaka_AlpakaTestKernelAdditionAlgo_h diff --git a/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionModule.cc b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionModule.cc new file mode 100644 index 0000000000000..a58931c389985 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionModule.cc @@ -0,0 +1,124 @@ +#include +#include +#include +#include + +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +#include "AlpakaTestKernelAdditionAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class AlpakaTestKernelAdditionModule : public edm::global::EDAnalyzer<> { + public: + explicit AlpakaTestKernelAdditionModule(edm::ParameterSet const& config); + ~AlpakaTestKernelAdditionModule() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override; + + private: + const uint32_t size_; + }; + + AlpakaTestKernelAdditionModule::AlpakaTestKernelAdditionModule(edm::ParameterSet const& config) + : size_(config.getParameter("size")) {} + + void AlpakaTestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("size", 1024 * 1024); + + // ignore the alpaka = cms.untracked.PSet(...) injected by the framework + edm::ParameterSetDescription alpaka; + alpaka.setAllowAnything(); + desc.addUntracked("alpaka", alpaka); + + descriptions.addWithDefaultLabel(desc); + } + + void AlpakaTestKernelAdditionModule::analyze(edm::StreamID, + edm::Event const& event, + edm::EventSetup const& setup) const { + // require a valid Alpaka backend for running + edm::Service service; + if (not service or not service->enabled()) { + std::cout << "The " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) + << " is not available or disabled, the test will be skipped.\n"; + return; + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // allocate input and output host buffers + std::vector in1_h(size_); + std::vector in2_h(size_); + std::vector out_h(size_); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size_; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size_); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size_); + auto out_d = cms::alpakatools::make_device_buffer(queue, size_); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size_); + alpaka::memcpy(queue, in2_d, in2_h, size_); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + HeterogeneousTestAlpakaKernelPlugins::wrapper_add_vectors_f( + queue, in1_d.data(), in2_d.data(), out_d.data(), size_); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // check the results + for (uint32_t i = 0; i < size_; ++i) { + float sum = in1_h[i] + in2_h[i]; + assert(out_h[i] < sum + epsilon); + assert(out_h[i] > sum - epsilon); + } + } + + std::cout << "All tests passed.\n"; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(AlpakaTestKernelAdditionModule); diff --git a/HeterogeneousTest/AlpakaKernel/test/BuildFile.xml b/HeterogeneousTest/AlpakaKernel/test/BuildFile.xml new file mode 100644 index 0000000000000..75c73a6122b0b --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/test/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc b/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc new file mode 100644 index 0000000000000..14b4f7f520640 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc @@ -0,0 +1,91 @@ +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +TEST_CASE("HeterogeneousTest/AlpakaKernel test", "[alpakaTestDeviceAdditionKernel]") { + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, " + "the test will be skipped."); + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + constexpr uint32_t size = 1024 * 1024; + + // allocate input and output host buffers + std::vector in1_h(size); + std::vector in2_h(size); + std::vector out_h(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") { + REQUIRE_NOTHROW([&]() { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size); + alpaka::memcpy(queue, in2_d, in2_h, size); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + alpaka::exec(queue, + cms::alpakatools::make_workdiv(32, 32), + cms::alpakatest::KernelAddVectorsF{}, + in1_d.data(), + in2_d.data(), + out_d.data(), + size); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d, size); + + // wait for all the operations to complete + alpaka::wait(queue); + }()); + + // check the results + for (uint32_t i = 0; i < size; ++i) { + float sum = in1_h[i] + in2_h[i]; + CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon)); + } + } + } +} diff --git a/HeterogeneousTest/AlpakaKernel/test/testAlpakaTestKernelAdditionModule.py b/HeterogeneousTest/AlpakaKernel/test/testAlpakaTestKernelAdditionModule.py new file mode 100644 index 0000000000000..bc7f9fae436ed --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/test/testAlpakaTestKernelAdditionModule.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TestAlpakaTestKernelAdditionModule') +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.source = cms.Source('EmptySource') + +process.alpakaTestKernelAdditionModule = cms.EDAnalyzer('AlpakaTestKernelAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.path = cms.Path(process.alpakaTestKernelAdditionModule) + +process.maxEvents.input = 1 diff --git a/HeterogeneousTest/AlpakaOpaque/BuildFile.xml b/HeterogeneousTest/AlpakaOpaque/BuildFile.xml new file mode 100644 index 0000000000000..03d0d171f2be2 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/HeterogeneousTest/AlpakaOpaque/README.md b/HeterogeneousTest/AlpakaOpaque/README.md new file mode 100644 index 0000000000000..3bd31fbd2bbd6 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/README.md @@ -0,0 +1,46 @@ +# Introduction + +The packages `HeterogeneousTest/AlpakaDevice`, `HeterogeneousTest/AlpakaKernel`, +`HeterogeneousTest/AlpakaWrapper` and `HeterogeneousTest/AlpakaOpaque` implement a set of libraries, +plugins and tests to exercise the build rules for Alpaka. +In particular, these tests show what is supported and what are the limitations implementing +Alpaka-based libraries, and using them from multiple plugins. + + +# `HeterogeneousTest/AlpakaOpaque` + +The package `HeterogeneousTest/AlpakaOpaque` implements a non-Alpaka aware library, with functions +that call the wrappers defined in the `HeterogeneousTest/AlpakaWrapper` library: +```c++ +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void opaque_add_vectors_f(...); + void opaque_add_vectors_d(...); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test +``` + +The `plugins` directory implements the `AlpakaTestOpqaueAdditionModule` `EDAnalyzer` that calls the +function defined in this library. This plugin shows how the function can be used directly from a +host-only, non-Alpaka aware plugin. + +The `test` directory implements the `testAlpakaDeviceAdditionOpqaue` test binary that calls the +function defined in this library, and shows how they can be used directly from a host-only, +non-Alpaka aware application. +It also contains the `testAlpakaTestOpqaueAdditionModule.py` python configuration to exercise the +`AlpakaTestOpqaueAdditionModule` module. + + +# Other packages + +For various ways in which this library and plugin can be tested, see also the other +`HeterogeneousTest/Alpaka...` packages: + - [`HeterogeneousTest/AlpakaDevice/README.md`](../../HeterogeneousTest/AlpakaDevice/README.md) + - [`HeterogeneousTest/AlpakaKernel/README.md`](../../HeterogeneousTest/AlpakaKernel/README.md) + - [`HeterogeneousTest/AlpakaWrapper/README.md`](../../HeterogeneousTest/AlpakaWrapper/README.md) + + +# Combining plugins + +`HeterogeneousTest/AlpakaOpaque/test` contains also the `testAlpakaTestAdditionModules.py` python +configuration that exercise all four plugins in a single application. diff --git a/HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h b/HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h new file mode 100644 index 0000000000000..12eb45373ab1c --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h @@ -0,0 +1,16 @@ +#ifndef HeterogeneousTest_AlpakaOpaque_interface_alpaka_DeviceAdditionOpaque_h +#define HeterogeneousTest_AlpakaOpaque_interface_alpaka_DeviceAdditionOpaque_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void opaque_add_vectors_f(const float* in1, const float* in2, float* out, uint32_t size); + + void opaque_add_vectors_d(const double* in1, const double* in2, double* out, uint32_t size); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test + +#endif // HeterogeneousTest_AlpakaOpaque_interface_alpaka_DeviceAdditionOpaque_h diff --git a/HeterogeneousTest/AlpakaOpaque/plugins/BuildFile.xml b/HeterogeneousTest/AlpakaOpaque/plugins/BuildFile.xml new file mode 100644 index 0000000000000..aad45d082d2e3 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/plugins/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaOpaque/plugins/alpaka/AlpakaTestOpaqueAdditionModule.cc b/HeterogeneousTest/AlpakaOpaque/plugins/alpaka/AlpakaTestOpaqueAdditionModule.cc new file mode 100644 index 0000000000000..855856c9d7af3 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/plugins/alpaka/AlpakaTestOpaqueAdditionModule.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class AlpakaTestOpaqueAdditionModule : public edm::global::EDAnalyzer<> { + public: + explicit AlpakaTestOpaqueAdditionModule(edm::ParameterSet const& config); + ~AlpakaTestOpaqueAdditionModule() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override; + + private: + const uint32_t size_; + }; + + AlpakaTestOpaqueAdditionModule::AlpakaTestOpaqueAdditionModule(edm::ParameterSet const& config) + : size_(config.getParameter("size")) {} + + void AlpakaTestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("size", 1024 * 1024); + + // ignore the alpaka = cms.untracked.PSet(...) injected by the framework + edm::ParameterSetDescription alpaka; + alpaka.setAllowAnything(); + desc.addUntracked("alpaka", alpaka); + + descriptions.addWithDefaultLabel(desc); + } + + void AlpakaTestOpaqueAdditionModule::analyze(edm::StreamID, + edm::Event const& event, + edm::EventSetup const& setup) const { + // require a valid Alpaka backend for running + edm::Service service; + if (not service or not service->enabled()) { + std::cout << "The " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) + << " is not available or disabled, the test will be skipped.\n"; + return; + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // allocate input and output host buffers + std::vector in1(size_); + std::vector in2(size_); + std::vector out(size_); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size_; ++i) { + in1[i] = dist(rand); + in2[i] = dist(rand); + out[i] = 0.; + } + + // launch the 1-dimensional kernel for vector addition on the first available device + test::opaque_add_vectors_f(in1.data(), in2.data(), out.data(), size_); + + // check the results + for (uint32_t i = 0; i < size_; ++i) { + float sum = in1[i] + in2[i]; + assert(out[i] < sum + epsilon); + assert(out[i] > sum - epsilon); + } + + std::cout << "All tests passed.\n"; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(AlpakaTestOpaqueAdditionModule); diff --git a/HeterogeneousTest/AlpakaOpaque/src/alpaka/DeviceAdditionOpaque.cc b/HeterogeneousTest/AlpakaOpaque/src/alpaka/DeviceAdditionOpaque.cc new file mode 100644 index 0000000000000..34c3370b677c9 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/src/alpaka/DeviceAdditionOpaque.cc @@ -0,0 +1,85 @@ +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h" +#include "HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void opaque_add_vectors_f(const float* in1, const float* in2, float* out, uint32_t size) { + // run on the first available devices + auto const& device = cms::alpakatools::devices()[0]; + Queue queue{device}; + + // wrap the input and output data in views + auto in1_h = cms::alpakatools::make_host_view(in1, size); + auto in2_h = cms::alpakatools::make_host_view(in2, size); + auto out_h = cms::alpakatools::make_host_view(out, size); + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size); + alpaka::memcpy(queue, in2_d, in2_h, size); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + test::wrapper_add_vectors_f(queue, in1_d.data(), in2_d.data(), out_d.data(), size); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // the device buffers are freed automatically + } + + void opaque_add_vectors_d(const double* in1, const double* in2, double* out, uint32_t size) { + // run on the first available devices + auto const& device = cms::alpakatools::devices()[0]; + Queue queue{device}; + + // wrap the input and output data in views + auto in1_h = cms::alpakatools::make_host_view(in1, size); + auto in2_h = cms::alpakatools::make_host_view(in2, size); + auto out_h = cms::alpakatools::make_host_view(out, size); + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size); + alpaka::memcpy(queue, in2_d, in2_h, size); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + test::wrapper_add_vectors_d(queue, in1_d.data(), in2_d.data(), out_d.data(), size); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // the device buffers are freed automatically + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/HeterogeneousTest/AlpakaOpaque/test/BuildFile.xml b/HeterogeneousTest/AlpakaOpaque/test/BuildFile.xml new file mode 100644 index 0000000000000..51b598e396e9a --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/test/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaOpaque/test/alpaka/testDeviceAdditionOpaque.cc b/HeterogeneousTest/AlpakaOpaque/test/alpaka/testDeviceAdditionOpaque.cc new file mode 100644 index 0000000000000..aba728bd26218 --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/test/alpaka/testDeviceAdditionOpaque.cc @@ -0,0 +1,54 @@ +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousTest/AlpakaOpaque/interface/alpaka/DeviceAdditionOpaque.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +TEST_CASE("HeterogeneousTest/AlpakaOpaque test", "[alpakaTestOpaqueAdditionOpaque]") { + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, " + "the test will be skipped."); + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + constexpr uint32_t size = 1024 * 1024; + + // allocate input and output host buffers + std::vector in1(size); + std::vector in2(size); + std::vector out(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size; ++i) { + in1[i] = dist(rand); + in2[i] = dist(rand); + out[i] = 0.; + } + + SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") { + // launch the 1-dimensional kernel for vector addition + REQUIRE_NOTHROW(test::opaque_add_vectors_f(in1.data(), in2.data(), out.data(), size)); + + // check the results + for (uint32_t i = 0; i < size; ++i) { + float sum = in1[i] + in2[i]; + CHECK_THAT(out[i], Catch::Matchers::WithinAbs(sum, epsilon)); + } + } +} diff --git a/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py new file mode 100644 index 0000000000000..e1a2e44448a6c --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py @@ -0,0 +1,31 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TestAlpakaTestOpaqueAdditionModule') +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.source = cms.Source('EmptySource') + +process.alpakaTestDeviceAdditionModule = cms.EDAnalyzer('AlpakaTestDeviceAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.alpakaTestKernelAdditionModule = cms.EDAnalyzer('AlpakaTestKernelAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.alpakaTestWrapperAdditionModule = cms.EDAnalyzer('AlpakaTestWrapperAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.alpakaTestOpaqueAdditionModule = cms.EDAnalyzer('AlpakaTestOpaqueAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.path = cms.Path( + process.alpakaTestDeviceAdditionModule + + process.alpakaTestKernelAdditionModule + + process.alpakaTestWrapperAdditionModule + + process.alpakaTestOpaqueAdditionModule) + +process.maxEvents.input = 1 diff --git a/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestOpaqueAdditionModule.py b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestOpaqueAdditionModule.py new file mode 100644 index 0000000000000..a23a22b8389ec --- /dev/null +++ b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestOpaqueAdditionModule.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TestAlpakaTestOpaqueAdditionModule') +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.source = cms.Source('EmptySource') + +process.alpakaTestOpaqueAdditionModule = cms.EDAnalyzer('AlpakaTestOpaqueAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.path = cms.Path(process.alpakaTestOpaqueAdditionModule) + +process.maxEvents.input = 1 diff --git a/HeterogeneousTest/AlpakaWrapper/BuildFile.xml b/HeterogeneousTest/AlpakaWrapper/BuildFile.xml new file mode 100644 index 0000000000000..0418fc9ec2c38 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/HeterogeneousTest/AlpakaWrapper/README.md b/HeterogeneousTest/AlpakaWrapper/README.md new file mode 100644 index 0000000000000..e9aef644c554a --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/README.md @@ -0,0 +1,48 @@ +# Introduction + +The packages `HeterogeneousTest/AlpakaDevice`, `HeterogeneousTest/AlpakaKernel`, +`HeterogeneousTest/AlpakaWrapper` and `HeterogeneousTest/AlpakaOpaque` implement a set of libraries, +plugins and tests to exercise the build rules for Alpaka. +In particular, these tests show what is supported and what are the limitations implementing +Alpaka-based libraries, and using them from multiple plugins. + + +# `HeterogeneousTest/AlpakaWrapper` + +The package `HeterogeneousTest/AlpakaWrapper` implements a library that defines and exports +host-side wrappers that launch the kernels defined in the `HeterogeneousTest/AlpakaKernel` library: +```c++ +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void wrapper_add_vectors_f(...); + void wrapper_add_vectors_d(...); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test +``` +These wrappers can be used from host-only, non-Alpaka aware libraries, plugins and applications. +They can be linked with the standard host linker. + +The `plugins` directory implements the `AlpakaTestWrapperAdditionModule` `EDAnalyzer` that calls the +wrappers defined in this library. This plugin shows how the wrappers can be used directly from a +host-only, non-Alpaka aware plugin. + +The `test` directory implements the `testAlpakaDeviceAdditionWrapper` test binary that calls the +wrappers defined in this library, and shows how they can be used directly from a host-only, +non-Alpaka aware application. +It also contains the `testAlpakaTestWrapperAdditionModule.py` python configuration to exercise the +`AlpakaTestWrapperAdditionModule` module. + + +# Other packages + +For various ways in which this library and plugin can be tested, see also the other +`HeterogeneousTest/Alpaka...` packages: + - [`HeterogeneousTest/AlpakaDevice/README.md`](../../HeterogeneousTest/AlpakaDevice/README.md) + - [`HeterogeneousTest/AlpakaKernel/README.md`](../../HeterogeneousTest/AlpakaKernel/README.md) + - [`HeterogeneousTest/AlpakaOpaque/README.md`](../../HeterogeneousTest/AlpakaOpaque/README.md) + + +# Combining plugins + +`HeterogeneousTest/AlpakaOpaque/test` contains the `testAlpakaTestAdditionModules.py` python +configuration that exercise all four plugins in a single application. diff --git a/HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h b/HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h new file mode 100644 index 0000000000000..a278911aebfd4 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h @@ -0,0 +1,24 @@ +#ifndef HeterogeneousTest_AlpakaWrapper_interface_alpaka_DeviceAdditionWrapper_h +#define HeterogeneousTest_AlpakaWrapper_interface_alpaka_DeviceAdditionWrapper_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size); + + void wrapper_add_vectors_d(Queue& queue, + const double* __restrict__ in1, + const double* __restrict__ in2, + double* __restrict__ out, + uint32_t size); + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test + +#endif // HeterogeneousTest_AlpakaWrapper_interface_alpaka_DeviceAdditionWrapper_h diff --git a/HeterogeneousTest/AlpakaWrapper/plugins/BuildFile.xml b/HeterogeneousTest/AlpakaWrapper/plugins/BuildFile.xml new file mode 100644 index 0000000000000..3ebdae5ffd581 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/plugins/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaWrapper/plugins/alpaka/AlpakaTestWrapperAdditionModule.cc b/HeterogeneousTest/AlpakaWrapper/plugins/alpaka/AlpakaTestWrapperAdditionModule.cc new file mode 100644 index 0000000000000..1a33e51c23348 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/plugins/alpaka/AlpakaTestWrapperAdditionModule.cc @@ -0,0 +1,122 @@ +#include +#include +#include +#include + +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" +#include "HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class AlpakaTestWrapperAdditionModule : public edm::global::EDAnalyzer<> { + public: + explicit AlpakaTestWrapperAdditionModule(edm::ParameterSet const& config); + ~AlpakaTestWrapperAdditionModule() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override; + + private: + const uint32_t size_; + }; + + AlpakaTestWrapperAdditionModule::AlpakaTestWrapperAdditionModule(edm::ParameterSet const& config) + : size_(config.getParameter("size")) {} + + void AlpakaTestWrapperAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("size", 1024 * 1024); + + // ignore the alpaka = cms.untracked.PSet(...) injected by the framework + edm::ParameterSetDescription alpaka; + alpaka.setAllowAnything(); + desc.addUntracked("alpaka", alpaka); + + descriptions.addWithDefaultLabel(desc); + } + + void AlpakaTestWrapperAdditionModule::analyze(edm::StreamID, + edm::Event const& event, + edm::EventSetup const& setup) const { + // require a valid Alpaka backend for running + edm::Service service; + if (not service or not service->enabled()) { + std::cout << "The " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) + << " is not available or disabled, the test will be skipped.\n"; + return; + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // allocate input and output host buffers + std::vector in1_h(size_); + std::vector in2_h(size_); + std::vector out_h(size_); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size_; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size_); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size_); + auto out_d = cms::alpakatools::make_device_buffer(queue, size_); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size_); + alpaka::memcpy(queue, in2_d, in2_h, size_); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + test::wrapper_add_vectors_f(queue, in1_d.data(), in2_d.data(), out_d.data(), size_); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // check the results + for (uint32_t i = 0; i < size_; ++i) { + float sum = in1_h[i] + in2_h[i]; + assert(out_h[i] < sum + epsilon); + assert(out_h[i] > sum - epsilon); + } + } + + std::cout << "All tests passed.\n"; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(AlpakaTestWrapperAdditionModule); diff --git a/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc b/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc new file mode 100644 index 0000000000000..b5f662fc930c9 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc @@ -0,0 +1,30 @@ +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h" +#include "HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + void wrapper_add_vectors_f(Queue& queue, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) { + alpaka::exec( + queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsF{}, in1, in2, out, size); + } + + void wrapper_add_vectors_d(Queue& queue, + const double* __restrict__ in1, + const double* __restrict__ in2, + double* __restrict__ out, + uint32_t size) { + alpaka::exec( + queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsD{}, in1, in2, out, size); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/HeterogeneousTest/AlpakaWrapper/test/BuildFile.xml b/HeterogeneousTest/AlpakaWrapper/test/BuildFile.xml new file mode 100644 index 0000000000000..d1bbeb51e4ef5 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/test/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/HeterogeneousTest/AlpakaWrapper/test/alpaka/testDeviceAdditionWrapper.cc b/HeterogeneousTest/AlpakaWrapper/test/alpaka/testDeviceAdditionWrapper.cc new file mode 100644 index 0000000000000..00e2776f64948 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/test/alpaka/testDeviceAdditionWrapper.cc @@ -0,0 +1,85 @@ +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousTest/AlpakaWrapper/interface/alpaka/DeviceAdditionWrapper.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +TEST_CASE("HeterogeneousTest/AlpakaWrapper test", "[alpakaTestDeviceAdditionWrapper]") { + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + FAIL("No devices available for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend, " + "the test will be skipped."); + } + + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine rand{rd()}; + std::normal_distribution dist{0., 1.}; + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + constexpr uint32_t size = 1024 * 1024; + + // allocate input and output host buffers + std::vector in1_h(size); + std::vector in2_h(size); + std::vector out_h(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (uint32_t i = 0; i < size; ++i) { + in1_h[i] = dist(rand); + in2_h[i] = dist(rand); + out_h[i] = 0.; + } + + // run the test on all available devices + for (auto const& device : cms::alpakatools::devices()) { + SECTION("Test add_vectors_f on " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend") { + REQUIRE_NOTHROW([&]() { + Queue queue{device}; + + // allocate input and output buffers on the device + auto in1_d = cms::alpakatools::make_device_buffer(queue, size); + auto in2_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device + // FIXME: pass the explicit size of type uint32_t to avoid compilation error + // The destination view and the extent are required to have compatible index types! + alpaka::memcpy(queue, in1_d, in1_h, size); + alpaka::memcpy(queue, in2_d, in2_h, size); + + // fill the output buffer with zeros + alpaka::memset(queue, out_d, 0); + + // launch the 1-dimensional kernel for vector addition + test::wrapper_add_vectors_f(queue, in1_d.data(), in2_d.data(), out_d.data(), size); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d, size); + + // wait for all the operations to complete + alpaka::wait(queue); + }()); + + // check the results + for (uint32_t i = 0; i < size; ++i) { + float sum = in1_h[i] + in2_h[i]; + CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon)); + } + } + } +} diff --git a/HeterogeneousTest/AlpakaWrapper/test/testAlpakaTestWrapperAdditionModule.py b/HeterogeneousTest/AlpakaWrapper/test/testAlpakaTestWrapperAdditionModule.py new file mode 100644 index 0000000000000..7de7cdf1f2451 --- /dev/null +++ b/HeterogeneousTest/AlpakaWrapper/test/testAlpakaTestWrapperAdditionModule.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('TestAlpakaTestWrapperAdditionModule') +process.load('Configuration.StandardSequences.Accelerators_cff') +process.load('HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi') + +process.source = cms.Source('EmptySource') + +process.alpakaTestWrapperAdditionModule = cms.EDAnalyzer('AlpakaTestWrapperAdditionModule@alpaka', + size = cms.uint32( 1024*1024 ) +) + +process.path = cms.Path(process.alpakaTestWrapperAdditionModule) + +process.maxEvents.input = 1