Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZFP: Add tests for ZFP+CUDA #2926

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion source/adios2/operator/compress/CompressZFP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ zfp_stream *CompressZFP::GetZFPStream(const Dims &dimensions, DataType type,
{
zfp_stream *stream = zfp_stream_open(NULL);
zfp_stream_set_execution(stream, ZFP_DEFAULT_EXECUTION_POLICY);
bool isSerial = ZFP_DEFAULT_EXECUTION_POLICY == zfp_exec_serial;

auto itAccuracy = parameters.find("accuracy");
const bool hasAccuracy = itAccuracy != parameters.end();
Expand Down Expand Up @@ -291,13 +292,15 @@ zfp_stream *CompressZFP::GetZFPStream(const Dims &dimensions, DataType type,
else if (backend == "serial")
{
policy = zfp_exec_serial;
isSerial = true;
}

zfp_stream_set_execution(stream, policy);
}

if ((hasAccuracy && hasPrecision) || (hasAccuracy && hasRate) ||
(hasPrecision && hasRate))
(hasPrecision && hasRate) ||
(!hasAccuracy && !hasRate && !hasPrecision && !isSerial))
{
std::ostringstream oss;
oss << std::endl
Expand Down
12 changes: 12 additions & 0 deletions testing/adios2/engine/bp/operations/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ if(ADIOS2_HAVE_ZFP)
"XML_CONFIG_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
)
endforeach()

if(ADIOS2_HAVE_CUDA)
enable_language(CUDA)

gtest_add_tests_helper(WriteReadZfpCuda MPI_ALLOW BP Engine.BP. .BP4
WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4"
)
set_source_files_properties(CudaRoutines.cu PROPERTIES LANGUAGE CUDA)
foreach(tgt ${Test.Engine.BP.WriteReadZfpCuda-TARGETS})
target_sources(${tgt} PRIVATE CudaRoutines.cu)
endforeach()
endif()
endif()

if(ADIOS2_HAVE_MGARD)
Expand Down
11 changes: 11 additions & 0 deletions testing/adios2/engine/bp/operations/CudaRoutines.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "CudaRoutines.h"

__global__ void __cuda_increment(int offset, float *vec, float val)
{
vec[blockIdx.x + offset] += val;
}

void cuda_increment(int M, int N, int offset, float *vec, float val)
{
__cuda_increment<<<M, N>>>(offset, vec, val);
}
9 changes: 9 additions & 0 deletions testing/adios2/engine/bp/operations/CudaRoutines.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#ifndef __TESTING_ADIOS2_CUDA_ROUTINES_H__
#define __TESTING_ADIOS2_CUDA_ROUTINES_H__

#include <cuda.h>
#include <cuda_runtime.h>

void cuda_increment(int M, int N, int offset, float *vec, float val);

#endif
174 changes: 174 additions & 0 deletions testing/adios2/engine/bp/operations/TestBPWriteReadZfpCuda.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*/

#include "CudaRoutines.h"
#include <adios2.h>

#include <algorithm>
#include <array>
#include <gtest/gtest.h>
#include <iostream>
#include <numeric> //std::iota

std::string engineName; // comes from command line

const float EPSILON = std::numeric_limits<float>::epsilon();
const float INCREMENT = 10.0f;

void ZFPRateCUDA(const std::string rate)
{
// Each process would write a 1x8 array and all processes would
// form a mpiSize * Nx 1D array
const std::string fname("BPWRZFP1D_" + rate + ".bp");

// Number of rows
const size_t Nx = 100;

// Number of steps
const size_t NSteps = 1;

int mpiRank = 0, mpiSize = 1;
#if ADIOS2_USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
#endif

#if ADIOS2_USE_MPI
adios2::ADIOS adios(MPI_COMM_WORLD);
#else
adios2::ADIOS adios;
#endif

const size_t NxTotal = Nx * mpiSize;

// Initialize the simulation data
std::vector<float> r32s(NxTotal, .0f);
std::iota(r32s.begin(), r32s.end(), .0f);

float *gpuSimData = nullptr;
cudaMalloc(&gpuSimData, Nx * sizeof(float));
cudaMemcpy(gpuSimData, ((float *)&r32s[0] + (Nx * mpiRank)),
Nx * sizeof(float), cudaMemcpyHostToDevice);

{
adios2::IO io = adios.DeclareIO("TestIO");

if (!engineName.empty())
{
io.SetEngine(engineName);
}

const adios2::Dims shape{static_cast<size_t>(NxTotal)};
const adios2::Dims start{static_cast<size_t>(Nx * mpiRank)};
const adios2::Dims count{Nx};

auto var_r32 = io.DefineVariable<float>("r32", shape, start, count);

// add operations
adios2::Operator ZFPOp =
adios.DefineOperator("ZFPCompressor", adios2::ops::LossyZFP);

var_r32.AddOperation(ZFPOp, {{adios2::ops::zfp::key::rate, rate}});

adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);

for (size_t step = 0; step < NSteps; ++step)
{
// Update values in the simulation data
cuda_increment(Nx, 1, 0, gpuSimData, INCREMENT);

bpWriter.BeginStep();
var_r32.SetMemorySpace(adios2::MemorySpace::CUDA);
bpWriter.Put(var_r32, gpuSimData);
bpWriter.EndStep();
}

bpWriter.Close();
}

#if ADIOS2_USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

{
adios2::IO io = adios.DeclareIO("ReadIO");

if (!engineName.empty())
{
io.SetEngine(engineName);
}

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

auto var_r32 = io.InquireVariable<float>("r32");
EXPECT_TRUE(var_r32);
ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
ASSERT_EQ(var_r32.Steps(), NSteps);
ASSERT_EQ(var_r32.Shape()[0], NxTotal);

auto mmR32 = std::minmax_element(r32s.begin(), r32s.end());
EXPECT_EQ(var_r32.Min() - INCREMENT, *mmR32.first);
EXPECT_EQ(var_r32.Max() - INCREMENT, *mmR32.second);

unsigned int t = 0;
for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t)
{
std::vector<float> r32o(NxTotal);
bpReader.Get(var_r32, r32o);
bpReader.EndStep();

// Remove INCREMENT from each element
std::transform(r32o.begin(), r32o.end(), r32o.begin(),
std::bind(std::minus<float>(), std::placeholders::_1,
INCREMENT));

for (int i = 0; i < NxTotal; i++)
{
char msg[1 << 8] = {0};
sprintf(msg, "t=%d i=%d rank=%d r32o=%f r32s=%f", t, i, mpiRank,
r32o[i], r32s[i]);
ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg;
}
}
EXPECT_EQ(t, NSteps);

bpReader.Close();
}
}

class BPWRZFPCUDA : public ::testing::TestWithParam<std::string>
{
public:
BPWRZFPCUDA() = default;

virtual void SetUp() {}
virtual void TearDown() {}
};

TEST_P(BPWRZFPCUDA, ADIOS2BPWRZFPCUDA) { ZFPRateCUDA(GetParam()); }

INSTANTIATE_TEST_SUITE_P(ZFPRate, BPWRZFPCUDA,
::testing::Values("16", "32", "64"));

int main(int argc, char **argv)
{
#if ADIOS2_USE_MPI
MPI_Init(nullptr, nullptr);
#endif

int result;
::testing::InitGoogleTest(&argc, argv);
if (argc > 1)
{
engineName = std::string(argv[1]);
}
result = RUN_ALL_TESTS();

#if ADIOS2_USE_MPI
MPI_Finalize();
#endif

return result;
}