diff --git a/source/adios2/operator/compress/CompressZFP.cpp b/source/adios2/operator/compress/CompressZFP.cpp index a4798c77b6..7148f5960c 100644 --- a/source/adios2/operator/compress/CompressZFP.cpp +++ b/source/adios2/operator/compress/CompressZFP.cpp @@ -262,6 +262,7 @@ zfp_stream *CompressZFP::GetZFPStream(const Dims &dimensions, DataType type, { zfp_stream *stream = zfp_stream_open(NULL); zfp_stream_set_execution(stream, ZFP_DEFAULT_EXECUTION_POLICY); + bool isSerial = ZFP_DEFAULT_EXECUTION_POLICY == zfp_exec_serial; auto itAccuracy = parameters.find("accuracy"); const bool hasAccuracy = itAccuracy != parameters.end(); @@ -291,13 +292,15 @@ zfp_stream *CompressZFP::GetZFPStream(const Dims &dimensions, DataType type, else if (backend == "serial") { policy = zfp_exec_serial; + isSerial = true; } zfp_stream_set_execution(stream, policy); } if ((hasAccuracy && hasPrecision) || (hasAccuracy && hasRate) || - (hasPrecision && hasRate)) + (hasPrecision && hasRate) || + (!hasAccuracy && !hasRate && !hasPrecision && !isSerial)) { std::ostringstream oss; oss << std::endl diff --git a/testing/adios2/engine/bp/operations/CMakeLists.txt b/testing/adios2/engine/bp/operations/CMakeLists.txt index 51a4ff419c..fe4c75173f 100644 --- a/testing/adios2/engine/bp/operations/CMakeLists.txt +++ b/testing/adios2/engine/bp/operations/CMakeLists.txt @@ -29,6 +29,18 @@ if(ADIOS2_HAVE_ZFP) "XML_CONFIG_DIR=${CMAKE_CURRENT_SOURCE_DIR}" ) endforeach() + + if(ADIOS2_HAVE_CUDA) + enable_language(CUDA) + + gtest_add_tests_helper(WriteReadZfpCuda MPI_ALLOW BP Engine.BP. .BP4 + WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4" + ) + set_source_files_properties(CudaRoutines.cu PROPERTIES LANGUAGE CUDA) + foreach(tgt ${Test.Engine.BP.WriteReadZfpCuda-TARGETS}) + target_sources(${tgt} PRIVATE CudaRoutines.cu) + endforeach() + endif() endif() if(ADIOS2_HAVE_MGARD) diff --git a/testing/adios2/engine/bp/operations/CudaRoutines.cu b/testing/adios2/engine/bp/operations/CudaRoutines.cu new file mode 100644 index 0000000000..cda74c6c84 --- /dev/null +++ b/testing/adios2/engine/bp/operations/CudaRoutines.cu @@ -0,0 +1,11 @@ +#include "CudaRoutines.h" + +__global__ void __cuda_increment(int offset, float *vec, float val) +{ + vec[blockIdx.x + offset] += val; +} + +void cuda_increment(int M, int N, int offset, float *vec, float val) +{ + __cuda_increment<<>>(offset, vec, val); +} diff --git a/testing/adios2/engine/bp/operations/CudaRoutines.h b/testing/adios2/engine/bp/operations/CudaRoutines.h new file mode 100644 index 0000000000..743c8697d1 --- /dev/null +++ b/testing/adios2/engine/bp/operations/CudaRoutines.h @@ -0,0 +1,9 @@ +#ifndef __TESTING_ADIOS2_CUDA_ROUTINES_H__ +#define __TESTING_ADIOS2_CUDA_ROUTINES_H__ + +#include +#include + +void cuda_increment(int M, int N, int offset, float *vec, float val); + +#endif diff --git a/testing/adios2/engine/bp/operations/TestBPWriteReadZfpCuda.cpp b/testing/adios2/engine/bp/operations/TestBPWriteReadZfpCuda.cpp new file mode 100644 index 0000000000..912598443c --- /dev/null +++ b/testing/adios2/engine/bp/operations/TestBPWriteReadZfpCuda.cpp @@ -0,0 +1,174 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + */ + +#include "CudaRoutines.h" +#include + +#include +#include +#include +#include +#include //std::iota + +std::string engineName; // comes from command line + +const float EPSILON = std::numeric_limits::epsilon(); +const float INCREMENT = 10.0f; + +void ZFPRateCUDA(const std::string rate) +{ + // Each process would write a 1x8 array and all processes would + // form a mpiSize * Nx 1D array + const std::string fname("BPWRZFP1D_" + rate + ".bp"); + + // Number of rows + const size_t Nx = 100; + + // Number of steps + const size_t NSteps = 1; + + int mpiRank = 0, mpiSize = 1; +#if ADIOS2_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank); + MPI_Comm_size(MPI_COMM_WORLD, &mpiSize); +#endif + +#if ADIOS2_USE_MPI + adios2::ADIOS adios(MPI_COMM_WORLD); +#else + adios2::ADIOS adios; +#endif + + const size_t NxTotal = Nx * mpiSize; + + // Initialize the simulation data + std::vector r32s(NxTotal, .0f); + std::iota(r32s.begin(), r32s.end(), .0f); + + float *gpuSimData = nullptr; + cudaMalloc(&gpuSimData, Nx * sizeof(float)); + cudaMemcpy(gpuSimData, ((float *)&r32s[0] + (Nx * mpiRank)), + Nx * sizeof(float), cudaMemcpyHostToDevice); + + { + adios2::IO io = adios.DeclareIO("TestIO"); + + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + const adios2::Dims shape{static_cast(NxTotal)}; + const adios2::Dims start{static_cast(Nx * mpiRank)}; + const adios2::Dims count{Nx}; + + auto var_r32 = io.DefineVariable("r32", shape, start, count); + + // add operations + adios2::Operator ZFPOp = + adios.DefineOperator("ZFPCompressor", adios2::ops::LossyZFP); + + var_r32.AddOperation(ZFPOp, {{adios2::ops::zfp::key::rate, rate}}); + + adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); + + for (size_t step = 0; step < NSteps; ++step) + { + // Update values in the simulation data + cuda_increment(Nx, 1, 0, gpuSimData, INCREMENT); + + bpWriter.BeginStep(); + var_r32.SetMemorySpace(adios2::MemorySpace::CUDA); + bpWriter.Put(var_r32, gpuSimData); + bpWriter.EndStep(); + } + + bpWriter.Close(); + } + +#if ADIOS2_USE_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif + + { + adios2::IO io = adios.DeclareIO("ReadIO"); + + if (!engineName.empty()) + { + io.SetEngine(engineName); + } + + adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); + + auto var_r32 = io.InquireVariable("r32"); + EXPECT_TRUE(var_r32); + ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray); + ASSERT_EQ(var_r32.Steps(), NSteps); + ASSERT_EQ(var_r32.Shape()[0], NxTotal); + + auto mmR32 = std::minmax_element(r32s.begin(), r32s.end()); + EXPECT_EQ(var_r32.Min() - INCREMENT, *mmR32.first); + EXPECT_EQ(var_r32.Max() - INCREMENT, *mmR32.second); + + unsigned int t = 0; + for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t) + { + std::vector r32o(NxTotal); + bpReader.Get(var_r32, r32o); + bpReader.EndStep(); + + // Remove INCREMENT from each element + std::transform(r32o.begin(), r32o.end(), r32o.begin(), + std::bind(std::minus(), std::placeholders::_1, + INCREMENT)); + + for (int i = 0; i < NxTotal; i++) + { + char msg[1 << 8] = {0}; + sprintf(msg, "t=%d i=%d rank=%d r32o=%f r32s=%f", t, i, mpiRank, + r32o[i], r32s[i]); + ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg; + } + } + EXPECT_EQ(t, NSteps); + + bpReader.Close(); + } +} + +class BPWRZFPCUDA : public ::testing::TestWithParam +{ +public: + BPWRZFPCUDA() = default; + + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST_P(BPWRZFPCUDA, ADIOS2BPWRZFPCUDA) { ZFPRateCUDA(GetParam()); } + +INSTANTIATE_TEST_SUITE_P(ZFPRate, BPWRZFPCUDA, + ::testing::Values("16", "32", "64")); + +int main(int argc, char **argv) +{ +#if ADIOS2_USE_MPI + MPI_Init(nullptr, nullptr); +#endif + + int result; + ::testing::InitGoogleTest(&argc, argv); + if (argc > 1) + { + engineName = std::string(argv[1]); + } + result = RUN_ALL_TESTS(); + +#if ADIOS2_USE_MPI + MPI_Finalize(); +#endif + + return result; +}