-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2777 from anagainaru/cuda_gpu_aware
BP4 engine capable of using device buffers with Put
- Loading branch information
Showing
20 changed files
with
316 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#------------------------------------------------------------------------------# | ||
# Distributed under the OSI-approved Apache License, Version 2.0. See | ||
# accompanying file Copyright.txt for details. | ||
#------------------------------------------------------------------------------# | ||
|
||
enable_language(CUDA) | ||
|
||
add_executable(GPUWriteRead_cuda cudaWriteRead.cu) | ||
target_link_libraries(GPUWriteRead_cuda PUBLIC adios2::cxx11 CUDA::cudart CUDA::cuda_driver) | ||
set_target_properties(GPUWriteRead_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/* | ||
* Simple example of writing and reading data | ||
* through ADIOS2 BP engine with multiple simulations steps | ||
* for every IO step. | ||
*/ | ||
|
||
#include <ios> | ||
#include <vector> | ||
#include <iostream> | ||
|
||
#include <adios2.h> | ||
|
||
#include <cuda.h> | ||
#include <cuda_runtime.h> | ||
|
||
__global__ void update_array(float *vect, int val) { | ||
vect[blockIdx.x] += val; | ||
} | ||
|
||
int BPWrite(const std::string fname, const size_t N, int nSteps){ | ||
// Initialize the simulation data | ||
float *gpuSimData; | ||
cudaMalloc(&gpuSimData, N * sizeof(float)); | ||
cudaMemset(gpuSimData, 0, N); | ||
|
||
// Set up the ADIOS structures | ||
adios2::ADIOS adios; | ||
adios2::IO io = adios.DeclareIO("WriteIO"); | ||
|
||
// Declare an array for the ADIOS data of size (NumOfProcesses * N) | ||
const adios2::Dims shape{static_cast<size_t>(N)}; | ||
const adios2::Dims start{static_cast<size_t>(0)}; | ||
const adios2::Dims count{N}; | ||
auto data = io.DefineVariable<float>("data", shape, start, count); | ||
|
||
adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write); | ||
|
||
// Simulation steps | ||
for (size_t step = 0; step < nSteps; ++step) | ||
{ | ||
// Make a 1D selection to describe the local dimensions of the | ||
// variable we write and its offsets in the global spaces | ||
adios2::Box<adios2::Dims> sel({0}, {N}); | ||
data.SetSelection(sel); | ||
|
||
// Start IO step every write step | ||
bpWriter.BeginStep(); | ||
data.SetMemorySpace(adios2::MemorySpace::CUDA); | ||
bpWriter.Put(data, gpuSimData); | ||
bpWriter.EndStep(); | ||
|
||
// Update values in the simulation data | ||
update_array<<<N,1>>>(gpuSimData, 10); | ||
} | ||
|
||
bpWriter.Close(); | ||
return 0; | ||
} | ||
|
||
int BPRead(const std::string fname, const size_t N, int nSteps){ | ||
// Create ADIOS structures | ||
adios2::ADIOS adios; | ||
adios2::IO io = adios.DeclareIO("ReadIO"); | ||
|
||
adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read); | ||
|
||
auto data = io.InquireVariable<float>("data"); | ||
std::cout << "Steps expected by the reader: " << bpReader.Steps() << std::endl; | ||
std::cout << "Expecting data per step: " << data.Shape()[0]; | ||
std::cout << " elements" << std::endl; | ||
|
||
int write_step = bpReader.Steps(); | ||
// Create the local buffer and initialize the access point in the ADIOS file | ||
std::vector<float> simData(N); //set size to N | ||
const adios2::Dims start{0}; | ||
const adios2::Dims count{N}; | ||
const adios2::Box<adios2::Dims> sel(start, count); | ||
data.SetSelection(sel); | ||
|
||
// Read the data in each of the ADIOS steps | ||
for (size_t step = 0; step < write_step; step++) | ||
{ | ||
data.SetStepSelection({step, 1}); | ||
bpReader.Get(data, simData.data()); | ||
bpReader.PerformGets(); | ||
std::cout << "Simualation step " << step << " : "; | ||
std::cout << simData.size() << " elements: " << simData[1] << std::endl; | ||
} | ||
bpReader.Close(); | ||
return 0; | ||
} | ||
|
||
int main(int argc, char **argv){ | ||
const std::string fname("GPUWriteRead.bp"); | ||
const int device_id = 1; | ||
cudaSetDevice(device_id); | ||
const size_t N = 6000; | ||
int nSteps = 10, ret = 0; | ||
|
||
ret += BPWrite(fname, N, nSteps); | ||
ret += BPRead(fname, N, nSteps); | ||
return ret; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Distributed under the OSI-approved Apache License, Version 2.0. See | ||
* accompanying file Copyright.txt for details. | ||
* | ||
* adiosCUDA.cpp | ||
* | ||
* Created on: May 9, 2021 | ||
* Author: Ana Gainaru [email protected] | ||
*/ | ||
|
||
#ifndef ADIOS2_HELPER_ADIOSCUDA_CU_ | ||
#define ADIOS2_HELPER_ADIOSCUDA_CU_ | ||
|
||
#include <thrust/extrema.h> | ||
#include <thrust/device_ptr.h> | ||
#include "adios2/common/ADIOSMacros.h" | ||
|
||
#include "adiosCUDA.h" | ||
|
||
namespace { | ||
template <class T> | ||
void CUDAMinMaxImpl(const T *values, const size_t size, T &min, T &max) | ||
{ | ||
thrust::device_ptr<const T> dev_ptr(values); | ||
auto res = thrust::minmax_element(dev_ptr, dev_ptr + size); | ||
cudaMemcpy(&min, thrust::raw_pointer_cast(res.first), sizeof(T), cudaMemcpyDeviceToHost); | ||
cudaMemcpy(&max, thrust::raw_pointer_cast(res.second), sizeof(T), cudaMemcpyDeviceToHost); | ||
} | ||
// types non supported on the device | ||
void CUDAMinMaxImpl(const long double *values, const size_t size, long double &min, long double &max) {} | ||
void CUDAMinMaxImpl(const std::complex<float> *values, const size_t size, std::complex<float> &min, std::complex<float> &max) {} | ||
void CUDAMinMaxImpl(const std::complex<double> *values, const size_t size, std::complex<double> &min, std::complex<double> &max) {} | ||
} | ||
|
||
template <class T> | ||
void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max) | ||
{ | ||
CUDAMinMaxImpl(values, size, min, max); | ||
} | ||
|
||
#define declare_type(T) \ | ||
template void adios2::helper::CUDAMinMax(const T *values, const size_t size, T &min, T &max); | ||
ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(declare_type) | ||
#undef declare_type | ||
|
||
#endif /* ADIOS2_HELPER_ADIOSCUDA_CU_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Distributed under the OSI-approved Apache License, Version 2.0. See | ||
* accompanying file Copyright.txt for details. | ||
* | ||
* adiosCUDA.h CUDA functions used in the ADIOS framework | ||
* | ||
* Created on: May 9, 2021 | ||
* Author: Ana Gainaru [email protected] | ||
*/ | ||
|
||
#ifndef ADIOS2_HELPER_ADIOSCUDA_H_ | ||
#define ADIOS2_HELPER_ADIOSCUDA_H_ | ||
|
||
namespace adios2 | ||
{ | ||
namespace helper | ||
{ | ||
|
||
/* | ||
* CUDA kernel for computing the min and max from a | ||
* GPU buffer | ||
*/ | ||
template <class T> | ||
void CUDAMinMax(const T *values, const size_t size, T &min, T &max); | ||
|
||
} // helper | ||
} // adios2 | ||
|
||
#endif /* ADIOS2_HELPER_ADIOSCUDA_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.