Skip to content

Commit

Permalink
Merge pull request #2777 from anagainaru/cuda_gpu_aware
Browse files Browse the repository at this point in the history
BP4 engine capable of using device buffers with Put
  • Loading branch information
eisenhauer authored Oct 8, 2021
2 parents 3b0b627 + 00b97e7 commit 5feba27
Show file tree
Hide file tree
Showing 20 changed files with 316 additions and 1 deletion.
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ adios_option(SZ "Enable support for SZ transforms" AUTO)
adios_option(LIBPRESSIO "Enable support for LIBPRESSIO transforms" AUTO)
adios_option(MGARD "Enable support for MGARD transforms" AUTO)
adios_option(PNG "Enable support for PNG transforms" AUTO)
adios_option(CUDA "Enable support for Cuda" AUTO)
adios_option(MPI "Enable support for MPI" AUTO)
adios_option(DAOS "Enable support for DAOS" AUTO)
adios_option(DataMan "Enable support for DataMan" AUTO)
Expand Down Expand Up @@ -174,7 +175,7 @@ if(ADIOS2_HAVE_MPI)
endif()

set(ADIOS2_CONFIG_OPTS
Blosc BZip2 ZFP SZ MGARD PNG MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO
Blosc BZip2 ZFP SZ MGARD PNG CUDA MPI DataMan DAOS MHS SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse LIBPRESSIO
)
GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS})
configure_file(
Expand Down Expand Up @@ -296,6 +297,9 @@ message(" C++ Compiler : ${CMAKE_CXX_COMPILER_ID} "
"${CMAKE_CXX_COMPILER_WRAPPER}")
message(" ${CMAKE_CXX_COMPILER}")
message("")
if(ADIOS2_HAVE_CUDA)
message(" Cuda Compiler : ${CMAKE_CUDA_COMPILER} ")
endif()
if(ADIOS2_HAVE_Fortran)
message(" Fortran Compiler : ${CMAKE_Fortran_COMPILER_ID} "
"${CMAKE_Fortran_COMPILER_VERSION} "
Expand Down
6 changes: 6 additions & 0 deletions bindings/CXX11/adios2/cxx11/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ namespace adios2
} \
\
template <> \
void Variable<T>::SetMemorySpace(const MemorySpace mem) \
{ \
m_Variable->SetMemorySpace(mem); \
} \
\
template <> \
void Variable<T>::SetShape(const Dims &shape) \
{ \
helper::CheckForNullptr(m_Variable, \
Expand Down
5 changes: 5 additions & 0 deletions bindings/CXX11/adios2/cxx11/Variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ class Variable
/** Checks if object is valid, e.g. if( variable ) { //..valid } */
explicit operator bool() const noexcept;

/**
* Sets the memory step for all following Puts
*/
void SetMemorySpace(const MemorySpace mem);

/**
* Set new shape, care must be taken when reading back the variable for
* different steps. Only applies to Global arrays.
Expand Down
10 changes: 10 additions & 0 deletions cmake/DetectOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ endif()

set(mpi_find_components C)

# Cuda
if(ADIOS2_USE_CUDA STREQUAL AUTO)
find_package(CUDAToolkit)
elseif(ADIOS2_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
endif()
if(CUDAToolkit_FOUND)
set(ADIOS2_HAVE_CUDA TRUE)
endif()

# Fortran
if(ADIOS2_USE_Fortran STREQUAL AUTO)
include(CheckLanguage)
Expand Down
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ endif()
if(ADIOS2_BUILD_EXAMPLES_EXPERIMENTAL)
add_subdirectory(experimental)
endif()

if(ADIOS2_HAVE_CUDA)
add_subdirectory(cuda)
endif()
10 changes: 10 additions & 0 deletions examples/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#------------------------------------------------------------------------------#
# Distributed under the OSI-approved Apache License, Version 2.0. See
# accompanying file Copyright.txt for details.
#------------------------------------------------------------------------------#

enable_language(CUDA)

add_executable(GPUWriteRead_cuda cudaWriteRead.cu)
target_link_libraries(GPUWriteRead_cuda PUBLIC adios2::cxx11 CUDA::cudart CUDA::cuda_driver)
set_target_properties(GPUWriteRead_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
103 changes: 103 additions & 0 deletions examples/cuda/cudaWriteRead.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Simple example of writing and reading data
* through ADIOS2 BP engine with multiple simulations steps
* for every IO step.
*/

#include <ios>
#include <vector>
#include <iostream>

#include <adios2.h>

#include <cuda.h>
#include <cuda_runtime.h>

__global__ void update_array(float *vect, int val) {
vect[blockIdx.x] += val;
}

int BPWrite(const std::string fname, const size_t N, int nSteps){
// Initialize the simulation data
float *gpuSimData;
cudaMalloc(&gpuSimData, N * sizeof(float));
cudaMemset(gpuSimData, 0, N);

// Set up the ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("WriteIO");

// Declare an array for the ADIOS data of size (NumOfProcesses * N)
const adios2::Dims shape{static_cast<size_t>(N)};
const adios2::Dims start{static_cast<size_t>(0)};
const adios2::Dims count{N};
auto data = io.DefineVariable<float>("data", shape, start, count);

adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);

// Simulation steps
for (size_t step = 0; step < nSteps; ++step)
{
// Make a 1D selection to describe the local dimensions of the
// variable we write and its offsets in the global spaces
adios2::Box<adios2::Dims> sel({0}, {N});
data.SetSelection(sel);

// Start IO step every write step
bpWriter.BeginStep();
data.SetMemorySpace(adios2::MemorySpace::CUDA);
bpWriter.Put(data, gpuSimData);
bpWriter.EndStep();

// Update values in the simulation data
update_array<<<N,1>>>(gpuSimData, 10);
}

bpWriter.Close();
return 0;
}

int BPRead(const std::string fname, const size_t N, int nSteps){
// Create ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("ReadIO");

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

auto data = io.InquireVariable<float>("data");
std::cout << "Steps expected by the reader: " << bpReader.Steps() << std::endl;
std::cout << "Expecting data per step: " << data.Shape()[0];
std::cout << " elements" << std::endl;

int write_step = bpReader.Steps();
// Create the local buffer and initialize the access point in the ADIOS file
std::vector<float> simData(N); //set size to N
const adios2::Dims start{0};
const adios2::Dims count{N};
const adios2::Box<adios2::Dims> sel(start, count);
data.SetSelection(sel);

// Read the data in each of the ADIOS steps
for (size_t step = 0; step < write_step; step++)
{
data.SetStepSelection({step, 1});
bpReader.Get(data, simData.data());
bpReader.PerformGets();
std::cout << "Simualation step " << step << " : ";
std::cout << simData.size() << " elements: " << simData[1] << std::endl;
}
bpReader.Close();
return 0;
}

int main(int argc, char **argv){
const std::string fname("GPUWriteRead.bp");
const int device_id = 1;
cudaSetDevice(device_id);
const size_t N = 6000;
int nSteps = 10, ret = 0;

ret += BPWrite(fname, N, nSteps);
ret += BPRead(fname, N, nSteps);
return ret;
}
7 changes: 7 additions & 0 deletions source/adios2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ add_library(adios2_core
helper/adiosXML.cpp
helper/adiosXMLUtil.cpp
helper/adiosYAML.cpp
helper/adiosCUDA.cu

#engine derived classes
engine/bp3/BP3Reader.cpp engine/bp3/BP3Reader.tcc
Expand Down Expand Up @@ -100,6 +101,12 @@ add_library(adios2_core
set_property(TARGET adios2_core PROPERTY EXPORT_NAME core)
set_property(TARGET adios2_core PROPERTY OUTPUT_NAME adios2${ADIOS2_LIBRARY_SUFFIX}_core)

if(ADIOS2_HAVE_CUDA)
enable_language(CUDA)
target_link_libraries(adios2_core PUBLIC CUDA::cudart CUDA::cuda_driver)
set_target_properties(adios2_core PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

target_include_directories(adios2_core
PUBLIC
$<BUILD_INTERFACE:${ADIOS2_SOURCE_DIR}/source>
Expand Down
8 changes: 8 additions & 0 deletions source/adios2/common/ADIOSTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
namespace adios2
{

/** Memory space for the buffers received with Put */
enum class MemorySpace
{
Detect, ///< Detect the memory space automatically
Host, ///< Host memory space (default)
CUDA ///< GPU memory spaces
};

/** Variable shape type identifier, assigned automatically from the signature of
* DefineVariable */
enum class ShapeID
Expand Down
1 change: 1 addition & 0 deletions source/adios2/core/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ namespace core
info.StepsCount = stepsCount; \
info.Data = const_cast<T *>(data); \
info.Operations = m_Operations; \
info.IsGPU = IsCUDAPointer((void *)data); \
m_BlocksInfo.push_back(info); \
return m_BlocksInfo.back(); \
} \
Expand Down
1 change: 1 addition & 0 deletions source/adios2/core/Variable.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class Variable : public VariableBase
SelectionType Selection = SelectionType::BoundingBox;
bool IsValue = false;
bool IsReverseDims = false;
bool IsGPU = false;
};

/** use for multiblock info */
Expand Down
21 changes: 21 additions & 0 deletions source/adios2/core/VariableBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,32 @@ VariableBase::VariableBase(const std::string &name, const DataType type,
InitShapeType();
}

bool VariableBase::IsCUDAPointer(void *ptr)
{
if (m_MemorySpace == MemorySpace::CUDA)
return true;
if (m_MemorySpace == MemorySpace::Host)
return false;

#ifdef ADIOS2_HAVE_CUDA
cudaPointerAttributes attr;
cudaPointerGetAttributes(&attr, ptr);
return attr.type == cudaMemoryTypeDevice;
#endif

return false;
}

size_t VariableBase::TotalSize() const noexcept
{
return helper::GetTotalSize(m_Count);
}

void VariableBase::SetMemorySpace(const MemorySpace mem)
{
m_MemorySpace = mem;
}

void VariableBase::SetShape(const adios2::Dims &shape)
{
if (m_Type == helper::GetDataType<std::string>())
Expand Down
13 changes: 13 additions & 0 deletions source/adios2/core/VariableBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class VariableBase
/** Variable -> sizeof(T),
* VariableCompound -> from constructor sizeof(struct) */
const size_t m_ElementSize;
MemorySpace m_MemorySpace = MemorySpace::Host;

ShapeID m_ShapeID = ShapeID::Unknown; ///< see shape types in ADIOSTypes.h
size_t m_BlockID = 0; ///< current block ID for local variables, global = 0
Expand Down Expand Up @@ -124,6 +125,18 @@ class VariableBase
*/
size_t TotalSize() const noexcept;

/**
* Check if buffer is allocated on CUDA space
* @param pointer to the user data
*/
bool IsCUDAPointer(void *ptr);

/**
* Set the memory space
* @param the memory space where the expected buffers were allocated
*/
void SetMemorySpace(const MemorySpace mem);

/**
* Set new shape
* @param shape input shape to be applied to this variable
Expand Down
46 changes: 46 additions & 0 deletions source/adios2/helper/adiosCUDA.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*
* adiosCUDA.cpp
*
* Created on: May 9, 2021
* Author: Ana Gainaru [email protected]
*/

#ifndef ADIOS2_HELPER_ADIOSCUDA_CU_
#define ADIOS2_HELPER_ADIOSCUDA_CU_

#include <thrust/extrema.h>
#include <thrust/device_ptr.h>
#include "adios2/common/ADIOSMacros.h"

#include "adiosCUDA.h"

namespace {
template <class T>
void CUDAMinMaxImpl(const T *values, const size_t size, T &min, T &max)
{
thrust::device_ptr<const T> dev_ptr(values);
auto res = thrust::minmax_element(dev_ptr, dev_ptr + size);
cudaMemcpy(&min, thrust::raw_pointer_cast(res.first), sizeof(T), cudaMemcpyDeviceToHost);
cudaMemcpy(&max, thrust::raw_pointer_cast(res.second), sizeof(T), cudaMemcpyDeviceToHost);
}
// types non supported on the device
void CUDAMinMaxImpl(const long double *values, const size_t size, long double &min, long double &max) {}
void CUDAMinMaxImpl(const std::complex<float> *values, const size_t size, std::complex<float> &min, std::complex<float> &max) {}
void CUDAMinMaxImpl(const std::complex<double> *values, const size_t size, std::complex<double> &min, std::complex<double> &max) {}
}

template <class T>
void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max)
{
CUDAMinMaxImpl(values, size, min, max);
}

#define declare_type(T) \
template void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max);
ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type)
#undef declare_type

#endif /* ADIOS2_HELPER_ADIOSCUDA_CU_ */
29 changes: 29 additions & 0 deletions source/adios2/helper/adiosCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Distributed under the OSI-approved Apache License, Version 2.0. See
* accompanying file Copyright.txt for details.
*
* adiosCUDA.h CUDA functions used in the ADIOS framework
*
* Created on: May 9, 2021
* Author: Ana Gainaru [email protected]
*/

#ifndef ADIOS2_HELPER_ADIOSCUDA_H_
#define ADIOS2_HELPER_ADIOSCUDA_H_

namespace adios2
{
namespace helper
{

/*
* CUDA kernel for computing the min and max from a
* GPU buffer
*/
template <class T>
void CUDAMinMax(const T *values, const size_t size, T &min, T &max);

} // helper
} // adios2

#endif /* ADIOS2_HELPER_ADIOSCUDA_H_ */
1 change: 1 addition & 0 deletions source/adios2/helper/adiosFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#ifndef ADIOS2_HELPER_ADIOSFUNCTIONS_H_
#define ADIOS2_HELPER_ADIOSFUNCTIONS_H_

#include "adios2/helper/adiosCUDA.h" //CUDA functions
#include "adios2/helper/adiosMath.h" //math functions (cmath, algorithm)
#include "adios2/helper/adiosMemory.h" //memcpy, std::copy, insert, resize
#include "adios2/helper/adiosNetwork.h" //network and staging functions
Expand Down
Loading

0 comments on commit 5feba27

Please sign in to comment.