Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Heterogeneous Memory Pool #37952

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
39b1a12
import from vin repo
VinInn May 4, 2022
26a6700
clean up
VinInn May 4, 2022
8e8a1ab
test templating
VinInn May 4, 2022
f1e6ec9
change name
VinInn May 5, 2022
d752dc8
compiles
VinInn May 5, 2022
8ddc45e
like runs....
VinInn May 5, 2022
5302169
check lifetimes
VinInn May 5, 2022
9ce6d66
fixed the pointer move
VinInn May 5, 2022
ead220d
factorize and protect
VinInn May 6, 2022
f13550c
test CPU as well
VinInn May 6, 2022
eea72dc
move inline
VinInn May 6, 2022
f19e812
move inline
VinInn May 6, 2022
6f140eb
code format
VinInn May 6, 2022
67500af
migrate more to new pool
VinInn May 7, 2022
12d2efc
more port
VinInn May 7, 2022
031e683
more port
VinInn May 7, 2022
d32b122
BUG fixed
VinInn May 8, 2022
d7aa3b3
remove inline
VinInn May 8, 2022
b42eeae
code format
VinInn May 8, 2022
fc05336
make Buffer a class
VinInn May 9, 2022
c7256ea
migrate to Buffer
VinInn May 9, 2022
5513942
migrate Track
VinInn May 9, 2022
04eaca2
migrate Vertex
VinInn May 9, 2022
99af3d8
do some cleaning
VinInn May 10, 2022
1311dc4
shcedule according to pool backend
VinInn May 10, 2022
903215f
avoid false sharing??
VinInn May 10, 2022
b299bc7
make code checks happy
VinInn May 10, 2022
0920241
make code checks happy
VinInn May 10, 2022
22d6c5b
make code checks happy
VinInn May 10, 2022
c697f59
remove final dump
VinInn May 11, 2022
c378b54
move impl in src. remove expensive debug
VinInn May 14, 2022
38b2be0
remove pessimization
VinInn May 14, 2022
6ccd526
fix unused after alloc fails
VinInn May 14, 2022
8ad2439
drive from service
VinInn May 15, 2022
d0d0f28
use same deleter
VinInn May 15, 2022
686cdbc
init mmorypool in tesst
VinInn May 16, 2022
2d03928
address comments
VinInn May 22, 2022
acf5efb
fix size of copy
VinInn May 26, 2022
a97d64e
fix size of copy
VinInn May 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@
#include <cuda_runtime.h>

#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"

class BeamSpotCUDA {
public:
using Buffer = memoryPool::Buffer<BeamSpotPOD>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @VinInn isnt this technically a namespace? According to rule 2.7 those should start with a lowercase letter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's a type alias: https://en.cppreference.com/w/cpp/language/type_alias .
So I guess it's OK for it to follow the same rule as for classes.


// default constructor, required by cms::cuda::Product<BeamSpotCUDA>
BeamSpotCUDA() = default;

// constructor that allocates cached device memory on the given CUDA stream
BeamSpotCUDA(cudaStream_t stream) { data_d_ = cms::cuda::make_device_unique<BeamSpotPOD>(stream); }
BeamSpotCUDA(cudaStream_t stream) {
data_d_ = memoryPool::cuda::makeBuffer<BeamSpotPOD>(1, stream, memoryPool::onDevice);
}

// movable, non-copiable
BeamSpotCUDA(BeamSpotCUDA const&) = delete;
Expand All @@ -23,11 +27,11 @@ class BeamSpotCUDA {
BeamSpotPOD* data() { return data_d_.get(); }
BeamSpotPOD const* data() const { return data_d_.get(); }

cms::cuda::device::unique_ptr<BeamSpotPOD>& ptr() { return data_d_; }
cms::cuda::device::unique_ptr<BeamSpotPOD> const& ptr() const { return data_d_; }
Buffer& ptr() { return data_d_; }
Buffer const& ptr() const { return data_d_; }

private:
cms::cuda::device::unique_ptr<BeamSpotPOD> data_d_;
Buffer data_d_;
};

#endif // CUDADataFormats_BeamSpot_interface_BeamSpotCUDA_h
189 changes: 0 additions & 189 deletions CUDADataFormats/Common/interface/HeterogeneousSoA.h

This file was deleted.

8 changes: 3 additions & 5 deletions CUDADataFormats/Common/interface/HostProduct.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormatsCommonHostProduct_H
#define CUDADataFormatsCommonHostProduct_H

#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include <memory>

// a heterogeneous unique pointer...
template <typename T>
Expand All @@ -12,18 +12,16 @@ class HostProduct {
HostProduct(HostProduct&&) = default;
HostProduct& operator=(HostProduct&&) = default;

explicit HostProduct(cms::cuda::host::unique_ptr<T>&& p) : hm_ptr(std::move(p)) {}
explicit HostProduct(std::unique_ptr<T>&& p) : std_ptr(std::move(p)) {}

auto const* get() const { return hm_ptr ? hm_ptr.get() : std_ptr.get(); }
auto const* get() const { return std_ptr.get(); }

auto const& operator*() const { return *get(); }

auto const* operator->() const { return get(); }

private:
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
std::unique_ptr<T> std_ptr; //!
};

#endif
13 changes: 6 additions & 7 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@

#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h"
#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h"
#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

class SiPixelDigiErrorsCUDA {
public:
using SiPixelErrorCompactVector = cms::cuda::SimpleVector<SiPixelErrorCompact>;

SiPixelDigiErrorsCUDA() = default;
explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream);
SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream);
~SiPixelDigiErrorsCUDA() = default;

SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
Expand All @@ -27,16 +26,16 @@ class SiPixelDigiErrorsCUDA {
SiPixelErrorCompactVector* error() { return error_d.get(); }
SiPixelErrorCompactVector const* error() const { return error_d.get(); }

using HostDataError = std::pair<SiPixelErrorCompactVector, cms::cuda::host::unique_ptr<SiPixelErrorCompact[]>>;
using HostDataError = std::pair<SiPixelErrorCompactVector, memoryPool::Buffer<SiPixelErrorCompact>>;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

void copyErrorToHostAsync(cudaStream_t stream);
int nErrorWords() const { return nErrorWords_; }

private:
cms::cuda::device::unique_ptr<SiPixelErrorCompact[]> data_d;
cms::cuda::device::unique_ptr<SiPixelErrorCompactVector> error_d;
cms::cuda::host::unique_ptr<SiPixelErrorCompactVector> error_h;
memoryPool::Buffer<SiPixelErrorCompact> data_d;
memoryPool::Buffer<SiPixelErrorCompactVector> error_d;
memoryPool::Buffer<SiPixelErrorCompactVector> error_h;
SiPixelFormatterErrors formatterErrors_h;
int nErrorWords_ = 0;
};
Expand Down
12 changes: 5 additions & 7 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
#ifndef CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
#define CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h

#include <cuda_runtime.h>
#include <cuda.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDASOAView.h"

class SiPixelDigisCUDA {
public:
using StoreType = uint16_t;
SiPixelDigisCUDA() = default;
explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
~SiPixelDigisCUDA() = default;

SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
Expand All @@ -28,14 +26,14 @@ class SiPixelDigisCUDA {
uint32_t nModules() const { return nModules_h; }
uint32_t nDigis() const { return nDigis_h; }

cms::cuda::host::unique_ptr<StoreType[]> copyAllToHostAsync(cudaStream_t stream) const;
memoryPool::Buffer<StoreType> copyAllToHostAsync(cudaStream_t stream) const;

SiPixelDigisCUDASOAView view() { return m_view; }
SiPixelDigisCUDASOAView const view() const { return m_view; }

private:
// These are consumed by downstream device code
cms::cuda::device::unique_ptr<StoreType[]> m_store;
memoryPool::Buffer<StoreType> m_store;

SiPixelDigisCUDASOAView m_view;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

#include <cuda_runtime.h>

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

#include <cstdint>
Expand Down Expand Up @@ -96,11 +94,11 @@ class SiPixelDigisCUDASOAView {
uint32_t* rawIdArr_;

template <typename ReturnType, typename StoreType, typename LocationType>
ReturnType* getColumnAddress(LocationType column, StoreType& store, int size) {
static ReturnType* getColumnAddress(LocationType column, StoreType& store, int size) {
return reinterpret_cast<ReturnType*>(store.get() + static_cast<int>(column) * roundFor128ByteAlignment(size));
}

static int roundFor128ByteAlignment(int size) {
static constexpr int roundFor128ByteAlignment(int size) {
constexpr int mul = 128 / sizeof(uint16_t);
return ((size + mul - 1) / mul) * mul;
};
Expand Down
Loading