Skip to content

Commit

Permalink
Add a test for ROCm libraries and build rules
Browse files Browse the repository at this point in the history
The package HeterogeneousTest/ROCmDevice implements a library that
defines and exports ROCm device-only functions, and a plugin and test
that use them.

The package HeterogeneousTest/ROCmKernel implements a library that
imports device functions from HeterogeneousTest/ROCmDevice to define
and export ROCm kernels, and a plugin and test that use them.

The package HeterogeneousTest/ROCmWrapper implements a library that
imports kernels from HeterogeneousTest/ROCmKernel to define and
export host-only wrappers around them, usable by non-ROCm libraries,
plugins and applications, and implements a plugin and test that use them.

The package HeterogeneousTest/ROCmOpaque implements a library that
imports kernels from HeterogeneousTest/ROCmKernel to define and
export host-only wrappers around the whole ROCm section, usable by
libraries, plugins and applications that are not ROCm-aware, and
implements a plugin and test that use them.
  • Loading branch information
fwyzard committed Jan 31, 2023
1 parent 35cd7fc commit 85e936f
Show file tree
Hide file tree
Showing 37 changed files with 1,197 additions and 0 deletions.
6 changes: 6 additions & 0 deletions HeterogeneousTest/ROCmDevice/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<iftool name="rocm">
<use name="rocm"/>
<export>
<lib name="1"/>
</export>
</iftool>
22 changes: 22 additions & 0 deletions HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h
#define HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h

#include <cstddef>

#include <hip/hip_runtime.h>

namespace cms::rocmtest {

__device__ void add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size);

__device__ void add_vectors_d(const double* __restrict__ in1,
const double* __restrict__ in2,
double* __restrict__ out,
size_t size);

} // namespace cms::rocmtest

#endif // HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h
12 changes: 12 additions & 0 deletions HeterogeneousTest/ROCmDevice/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<iftool name="rocm">
<library file="*.cc *.hip.cc" name="HeterogeneousTestROCmDevicePlugins">
<use name="rocm"/>
<use name="FWCore/Framework"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/ServiceRegistry"/>
<use name="HeterogeneousCore/ROCmServices"/>
<use name="HeterogeneousCore/ROCmUtilities"/>
<use name="HeterogeneousTest/ROCmDevice"/>
<flags EDM_PLUGIN="1"/>
</library>
</iftool>
15 changes: 15 additions & 0 deletions HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h
#define HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h

#include <cstddef>

namespace HeterogeneousCoreROCmTestDevicePlugins {

void wrapper_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size);

} // namespace HeterogeneousCoreROCmTestDevicePlugins

#endif // HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include <cstddef>

#include <hip/hip_runtime.h>

#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"

#include "ROCmTestDeviceAdditionAlgo.h"

namespace HeterogeneousCoreROCmTestDevicePlugins {

__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size) {
cms::rocmtest::add_vectors_f(in1, in2, out, size);
}

void wrapper_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size) {
kernel_add_vectors_f<<<32, 32>>>(in1, in2, out, size);
hipCheck(hipGetLastError());
}

} // namespace HeterogeneousCoreROCmTestDevicePlugins
106 changes: 106 additions & 0 deletions HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <random>
#include <vector>

#include <hip/hip_runtime.h>

#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/global/EDAnalyzer.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"

#include "ROCmTestDeviceAdditionAlgo.h"

class ROCmTestDeviceAdditionModule : public edm::global::EDAnalyzer<> {
public:
explicit ROCmTestDeviceAdditionModule(edm::ParameterSet const& config);
~ROCmTestDeviceAdditionModule() override = default;

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);

void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;

private:
const uint32_t size_;
};

ROCmTestDeviceAdditionModule::ROCmTestDeviceAdditionModule(edm::ParameterSet const& config)
: size_(config.getParameter<uint32_t>("size")) {}

void ROCmTestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<uint32_t>("size", 1024 * 1024);
descriptions.addWithDefaultLabel(desc);
}

void ROCmTestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
// require ROCm for running
edm::Service<ROCmService> cs;
if (not cs->enabled()) {
std::cout << "The ROCmService is disabled, the test will be skipped.\n";
return;
}

// random number generator with a gaussian distribution
std::random_device rd{};
std::default_random_engine rand{rd()};
std::normal_distribution<float> dist{0., 1.};

// tolerance
constexpr float epsilon = 0.000001;

// allocate input and output host buffers
std::vector<float> in1_h(size_);
std::vector<float> in2_h(size_);
std::vector<float> out_h(size_);

// fill the input buffers with random data, and the output buffer with zeros
for (size_t i = 0; i < size_; ++i) {
in1_h[i] = dist(rand);
in2_h[i] = dist(rand);
out_h[i] = 0.;
}

// allocate input and output buffers on the device
float* in1_d;
float* in2_d;
float* out_d;
hipCheck(hipMalloc(&in1_d, size_ * sizeof(float)));
hipCheck(hipMalloc(&in2_d, size_ * sizeof(float)));
hipCheck(hipMalloc(&out_d, size_ * sizeof(float)));

// copy the input data to the device
hipCheck(hipMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
hipCheck(hipMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));

// fill the output buffer with zeros
hipCheck(hipMemset(out_d, 0, size_ * sizeof(float)));

// launch the 1-dimensional kernel for vector addition
HeterogeneousCoreROCmTestDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);

// copy the results from the device to the host
hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost));

// wait for all the operations to complete
hipCheck(hipDeviceSynchronize());

// check the results
for (size_t i = 0; i < size_; ++i) {
float sum = in1_h[i] + in2_h[i];
assert(out_h[i] < sum + epsilon);
assert(out_h[i] > sum - epsilon);
}

std::cout << "All tests passed.\n";
}

#include "FWCore/Framework/interface/MakerMacros.h"
DEFINE_FWK_MODULE(ROCmTestDeviceAdditionModule);
34 changes: 34 additions & 0 deletions HeterogeneousTest/ROCmDevice/src/DeviceAddition.hip.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <cstddef>
#include <cstdint>

#include <hip/hip_runtime.h>

#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"

namespace cms::rocmtest {

__device__ void add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size) {
uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
uint32_t stride = blockDim.x * gridDim.x;

for (size_t i = thread; i < size; i += stride) {
out[i] = in1[i] + in2[i];
}
}

__device__ void add_vectors_d(const double* __restrict__ in1,
const double* __restrict__ in2,
double* __restrict__ out,
size_t size) {
uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
uint32_t stride = blockDim.x * gridDim.x;

for (size_t i = thread; i < size; i += stride) {
out[i] = in1[i] + in2[i];
}
}

} // namespace cms::rocmtest
10 changes: 10 additions & 0 deletions HeterogeneousTest/ROCmDevice/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<iftool name="rocm">
<bin file="testDeviceAddition.hip.cc" name="testRocmDeviceAddition">
<use name="catch2"/>
<use name="rocm"/>
<use name="HeterogeneousTest/ROCmDevice"/>
<use name="HeterogeneousCore/ROCmUtilities"/>
</bin>

<test name="testROCmTestDeviceAdditionModule" command="cmsRun ${LOCALTOP}/src/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py"/>
</iftool>
80 changes: 80 additions & 0 deletions HeterogeneousTest/ROCmDevice/test/testDeviceAddition.hip.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#include <cstddef>
#include <cstdint>
#include <random>
#include <vector>

#define CATCH_CONFIG_MAIN
#include <catch.hpp>

#include <hip/hip_runtime.h>

#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"

__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size) {
cms::rocmtest::add_vectors_f(in1, in2, out, size);
}

TEST_CASE("HeterogeneousTest/ROCmDevice test", "[rocmTestDeviceAddition]") {
cms::rocmtest::requireDevices();

// random number generator with a gaussian distribution
std::random_device rd{};
std::default_random_engine rand{rd()};
std::normal_distribution<float> dist{0., 1.};

// tolerance
constexpr float epsilon = 0.000001;

// buffer size
constexpr size_t size = 1024 * 1024;

// allocate input and output host buffers
std::vector<float> in1_h(size);
std::vector<float> in2_h(size);
std::vector<float> out_h(size);

// fill the input buffers with random data, and the output buffer with zeros
for (size_t i = 0; i < size; ++i) {
in1_h[i] = dist(rand);
in2_h[i] = dist(rand);
out_h[i] = 0.;
}

SECTION("Test add_vectors_f") {
// allocate input and output buffers on the device
float* in1_d;
float* in2_d;
float* out_d;
REQUIRE_NOTHROW(hipCheck(hipMalloc(&in1_d, size * sizeof(float))));
REQUIRE_NOTHROW(hipCheck(hipMalloc(&in2_d, size * sizeof(float))));
REQUIRE_NOTHROW(hipCheck(hipMalloc(&out_d, size * sizeof(float))));

// copy the input data to the device
REQUIRE_NOTHROW(hipCheck(hipMemcpy(in1_d, in1_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
REQUIRE_NOTHROW(hipCheck(hipMemcpy(in2_d, in2_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));

// fill the output buffer with zeros
REQUIRE_NOTHROW(hipCheck(hipMemset(out_d, 0, size * sizeof(float))));

// launch the 1-dimensional kernel for vector addition
kernel_add_vectors_f<<<32, 32>>>(in1_d, in2_d, out_d, size);
REQUIRE_NOTHROW(hipCheck(hipGetLastError()));

// copy the results from the device to the host
REQUIRE_NOTHROW(hipCheck(hipMemcpy(out_h.data(), out_d, size * sizeof(float), hipMemcpyDeviceToHost)));

// wait for all the operations to complete
REQUIRE_NOTHROW(hipCheck(hipDeviceSynchronize()));

// check the results
for (size_t i = 0; i < size; ++i) {
float sum = in1_h[i] + in2_h[i];
CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import FWCore.ParameterSet.Config as cms

process = cms.Process('TestROCmTestDeviceAdditionModule')

process.source = cms.Source('EmptySource')

process.ROCmService = cms.Service('ROCmService')

process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule',
size = cms.uint32( 1024*1024 )
)

process.path = cms.Path(process.rocmTestDeviceAdditionModule)

process.maxEvents.input = 1
7 changes: 7 additions & 0 deletions HeterogeneousTest/ROCmKernel/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<iftool name="rocm">
<use name="rocm"/>
<use name="HeterogeneousTest/ROCmDevice"/>
<export>
<lib name="1"/>
</export>
</iftool>
22 changes: 22 additions & 0 deletions HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h
#define HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h

#include <cstddef>

#include <hip/hip_runtime.h>

namespace cms::rocmtest {

__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size);

__global__ void kernel_add_vectors_d(const double* __restrict__ in1,
const double* __restrict__ in2,
double* __restrict__ out,
size_t size);

} // namespace cms::rocmtest

#endif // HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h
12 changes: 12 additions & 0 deletions HeterogeneousTest/ROCmKernel/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<iftool name="rocm">
<library file="*.cc *.hip.cc" name="HeterogeneousTestROCmKernelPlugins">
<use name="rocm"/>
<use name="FWCore/Framework"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/ServiceRegistry"/>
<use name="HeterogeneousCore/ROCmServices"/>
<use name="HeterogeneousCore/ROCmUtilities"/>
<use name="HeterogeneousTest/ROCmKernel"/>
<flags EDM_PLUGIN="1"/>
</library>
</iftool>
15 changes: 15 additions & 0 deletions HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h
#define HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h

#include <cstddef>

namespace HeterogeneousCoreROCmTestKernelPlugins {

void wrapper_add_vectors_f(const float* __restrict__ in1,
const float* __restrict__ in2,
float* __restrict__ out,
size_t size);

} // namespace HeterogeneousCoreROCmTestKernelPlugins

#endif // HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h
Loading

0 comments on commit 85e936f

Please sign in to comment.