-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Dan Riley
authored and
Dan Riley
committed
Jun 8, 2021
1 parent
2203a47
commit 39d686e
Showing
28 changed files
with
2,125 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<use name="DataFormats/Common"/> | ||
<use name="CUDADataFormats/Common"/> | ||
<use name="HeterogeneousCore/CUDAUtilities"/> | ||
<use name="cuda"/> | ||
<use name="rootcore"/> | ||
|
||
<export> | ||
<lib name="1"/> | ||
</export> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#ifndef CUDADataFormats_SiStripCluster_interface_GPUtypes_h | ||
#define CUDADataFormats_SiStripCluster_interface_GPUtypes_h | ||
|
||
#include <cstdint> | ||
#include <limits> | ||
|
||
namespace stripgpu { | ||
using detId_t = std::uint32_t; | ||
using fedId_t = std::uint16_t; | ||
using fedCh_t = std::uint8_t; | ||
using APVPair_t = std::uint16_t; | ||
using stripId_t = std::uint16_t; | ||
|
||
static constexpr detId_t invDet = std::numeric_limits<detId_t>::max(); | ||
static constexpr fedId_t invFed = std::numeric_limits<fedId_t>::max(); | ||
static constexpr stripId_t invStrip = std::numeric_limits<stripId_t>::max(); | ||
} // namespace stripgpu | ||
|
||
#endif |
144 changes: 144 additions & 0 deletions
144
CUDADataFormats/SiStripCluster/interface/MkFitSiStripClustersCUDA.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#ifndef CUDADataFormats_SiStripCluster_interface_MkFitSiStripClustersCUDA_h | ||
#define CUDADataFormats_SiStripCluster_interface_MkFitSiStripClustersCUDA_h | ||
|
||
#include "CUDADataFormats/SiStripCluster/interface/GPUtypes.h" | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" | ||
|
||
#include <cuda_runtime.h> | ||
|
||
class MkFitSiStripClustersCUDA { | ||
public: | ||
MkFitSiStripClustersCUDA() = default; | ||
explicit MkFitSiStripClustersCUDA(size_t maxClusters, int clustersPerStrip, cudaStream_t stream); | ||
~MkFitSiStripClustersCUDA() = default; | ||
|
||
MkFitSiStripClustersCUDA(const MkFitSiStripClustersCUDA &) = delete; | ||
MkFitSiStripClustersCUDA &operator=(const MkFitSiStripClustersCUDA &) = delete; | ||
MkFitSiStripClustersCUDA(MkFitSiStripClustersCUDA &&) = default; | ||
MkFitSiStripClustersCUDA &operator=(MkFitSiStripClustersCUDA &&) = default; | ||
|
||
void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; } | ||
|
||
uint32_t nClusters() const { return nClusters_h; } | ||
|
||
class GlobalDeviceView { | ||
public: | ||
// __device__ __forceinline__ float local_xx(int i) const { return __ldg(local_xx_ + i); } | ||
// __device__ __forceinline__ float local_xy(int i) const { return __ldg(local_xy_ + i); } | ||
// __device__ __forceinline__ float local_yy(int i) const { return __ldg(local_yy_ + i); } | ||
// __device__ __forceinline__ float local(int i) const { return __ldg(local_ + i); } | ||
__device__ __forceinline__ float global_x(int i) const { return __ldg(global_x_ + i); } | ||
__device__ __forceinline__ float global_y(int i) const { return __ldg(global_y_ + i); } | ||
__device__ __forceinline__ float global_z(int i) const { return __ldg(global_z_ + i); } | ||
|
||
__device__ __forceinline__ float global_xx(int i) const { return __ldg(global_xx_ + i); } | ||
__device__ __forceinline__ float global_xy(int i) const { return __ldg(global_xy_ + i); } | ||
__device__ __forceinline__ float global_xz(int i) const { return __ldg(global_xz_ + i); } | ||
__device__ __forceinline__ float global_yy(int i) const { return __ldg(global_yy_ + i); } | ||
__device__ __forceinline__ float global_yz(int i) const { return __ldg(global_yz_ + i); } | ||
__device__ __forceinline__ float global_zz(int i) const { return __ldg(global_zz_ + i); } | ||
|
||
__device__ __forceinline__ short layer(int i) const { return __ldg(layer_ + i); } | ||
__device__ __forceinline__ float charge(int i) const { return __ldg(charge_ + i); } | ||
// __device__ __forceinline__ stripgpu::detId_t clusterDetId(int i) const { return __ldg(clusterDetId_ + i); } | ||
// __device__ __forceinline__ uint32_t clusterIndex(int i) const { return __ldg(clusterIndex_ + i); } | ||
// __device__ __forceinline__ uint8_t clusterADCs(int i) const { return __ldg(clusterADCs_ + i); } | ||
__device__ __forceinline__ stripgpu::stripId_t firstStrip(int i) const { return __ldg(firstStrip_ + i); } | ||
__device__ __forceinline__ uint32_t clusterSize(int i) const { return __ldg(clusterSize_ + i); } | ||
|
||
friend MkFitSiStripClustersCUDA; | ||
|
||
// private: | ||
int nClusters_; | ||
|
||
// float *local_xx_; | ||
// float *local_xy_; | ||
// float *local_yy_; | ||
// float *local_; | ||
float *global_x_; | ||
float *global_y_; | ||
float *global_z_; | ||
|
||
float *global_xx_; | ||
float *global_xy_; | ||
float *global_xz_; | ||
float *global_yy_; | ||
float *global_yz_; | ||
float *global_zz_; | ||
|
||
short *layer_; | ||
float *charge_; | ||
stripgpu::detId_t *clusterDetId_; | ||
// uint32_t *clusterIndex_; | ||
//uint8_t *clusterADCs_; | ||
stripgpu::stripId_t *firstStrip_; | ||
uint32_t *clusterSize_; | ||
}; | ||
|
||
GlobalDeviceView *gview() const { return gview_d.get(); } | ||
|
||
class HostView { | ||
public: | ||
explicit HostView(size_t maxClusters, int clustersPerStrip, cudaStream_t stream); | ||
|
||
cms::cuda::host::unique_ptr<stripgpu::detId_t[]> clusterDetId_h; | ||
// cms::cuda::host::unique_ptr<uint32_t[]> clusterIndex_h; | ||
cms::cuda::host::unique_ptr<float[]> charge_h; | ||
//cms::cuda::host::unique_ptr<uint8_t[]> clusterADCs_h; | ||
cms::cuda::host::unique_ptr<uint32_t[]> clusterSize_h; | ||
cms::cuda::host::unique_ptr<stripgpu::stripId_t[]> firstStrip_h; | ||
|
||
// cms::cuda::host::unique_ptr<float[]> local_xx_h; | ||
// cms::cuda::host::unique_ptr<float[]> local_xy_h; | ||
// cms::cuda::host::unique_ptr<float[]> local_yy_h; | ||
// cms::cuda::host::unique_ptr<float[]> local_h; | ||
cms::cuda::host::unique_ptr<float[]> global_x_h; | ||
cms::cuda::host::unique_ptr<float[]> global_y_h; | ||
cms::cuda::host::unique_ptr<float[]> global_z_h; | ||
cms::cuda::host::unique_ptr<float[]> global_xx_h; | ||
cms::cuda::host::unique_ptr<float[]> global_xy_h; | ||
cms::cuda::host::unique_ptr<float[]> global_xz_h; | ||
cms::cuda::host::unique_ptr<float[]> global_yy_h; | ||
cms::cuda::host::unique_ptr<float[]> global_yz_h; | ||
cms::cuda::host::unique_ptr<float[]> global_zz_h; | ||
|
||
cms::cuda::host::unique_ptr<short[]> layer_h; | ||
int nClusters_h; | ||
}; | ||
|
||
std::unique_ptr<HostView> hostView(int clustersPerStrip, cudaStream_t stream) const; | ||
|
||
private: | ||
cms::cuda::device::unique_ptr<stripgpu::detId_t[]> clusterDetId_d; | ||
// cms::cuda::device::unique_ptr<uint32_t[]> clusterIndex_d; | ||
cms::cuda::device::unique_ptr<float[]> charge_d; | ||
//cms::cuda::device::unique_ptr<uint8_t[]> clusterADCs_d; | ||
cms::cuda::device::unique_ptr<uint32_t[]> clusterSize_d; | ||
cms::cuda::device::unique_ptr<stripgpu::stripId_t[]> firstStrip_d; | ||
|
||
// cms::cuda::device::unique_ptr<float[]> local_xx_d; | ||
// cms::cuda::device::unique_ptr<float[]> local_xy_d; | ||
// cms::cuda::device::unique_ptr<float[]> local_yy_d; | ||
// cms::cuda::device::unique_ptr<float[]> local_d; | ||
cms::cuda::device::unique_ptr<float[]> global_x_d; | ||
cms::cuda::device::unique_ptr<float[]> global_y_d; | ||
cms::cuda::device::unique_ptr<float[]> global_z_d; | ||
cms::cuda::device::unique_ptr<float[]> global_xx_d; | ||
cms::cuda::device::unique_ptr<float[]> global_xy_d; | ||
cms::cuda::device::unique_ptr<float[]> global_xz_d; | ||
cms::cuda::device::unique_ptr<float[]> global_yy_d; | ||
cms::cuda::device::unique_ptr<float[]> global_yz_d; | ||
cms::cuda::device::unique_ptr<float[]> global_zz_d; | ||
|
||
cms::cuda::device::unique_ptr<short[]> layer_d; | ||
|
||
cms::cuda::device::unique_ptr<GlobalDeviceView> gview_d; // "me" pointer | ||
|
||
public: | ||
int nClusters_h; | ||
}; | ||
|
||
#endif |
89 changes: 89 additions & 0 deletions
89
CUDADataFormats/SiStripCluster/interface/SiStripClustersCUDA.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#ifndef CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h | ||
#define CUDADataFormats_SiStripCluster_interface_SiStripClustersCUDA_h | ||
|
||
#include "CUDADataFormats/SiStripCluster/interface/GPUtypes.h" | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" | ||
|
||
#include <cuda_runtime.h> | ||
|
||
class SiStripClustersCUDA { | ||
public: | ||
static constexpr uint32_t kClusterMaxStrips = 16; | ||
|
||
SiStripClustersCUDA() = default; | ||
explicit SiStripClustersCUDA(size_t maxClusters, int clustersPerStrip, cudaStream_t stream); | ||
~SiStripClustersCUDA() = default; | ||
|
||
SiStripClustersCUDA(const SiStripClustersCUDA &) = delete; | ||
SiStripClustersCUDA &operator=(const SiStripClustersCUDA &) = delete; | ||
SiStripClustersCUDA(SiStripClustersCUDA &&) = default; | ||
SiStripClustersCUDA &operator=(SiStripClustersCUDA &&) = default; | ||
|
||
void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; } | ||
|
||
uint32_t nClusters() const { return nClusters_h; } | ||
|
||
class DeviceView { | ||
public: | ||
__device__ __forceinline__ uint32_t clusterIndex(int i) const { return __ldg(clusterIndex_ + i); } | ||
__device__ __forceinline__ uint32_t clusterSize(int i) const { return __ldg(clusterSize_ + i); } | ||
__device__ __forceinline__ uint8_t clusterADCs(int i) const { return __ldg(clusterADCs_ + i); } | ||
__device__ __forceinline__ stripgpu::detId_t clusterDetId(int i) const { return __ldg(clusterDetId_ + i); } | ||
__device__ __forceinline__ stripgpu::stripId_t firstStrip(int i) const { return __ldg(firstStrip_ + i); } | ||
__device__ __forceinline__ bool trueCluster(int i) const { return trueCluster_[i]; } | ||
__device__ __forceinline__ float barycenter(int i) const { return __ldg(barycenter_ + i); } | ||
__device__ __forceinline__ float charge(int i) const { return __ldg(charge_ + i); } | ||
|
||
friend SiStripClustersCUDA; | ||
|
||
// private: | ||
uint32_t *clusterIndex_; | ||
uint32_t *clusterSize_; | ||
uint8_t *clusterADCs_; | ||
stripgpu::detId_t *clusterDetId_; | ||
stripgpu::stripId_t *firstStrip_; | ||
bool *trueCluster_; | ||
float *barycenter_; | ||
float *charge_; | ||
int nClusters_; | ||
}; | ||
|
||
DeviceView *view() const { return view_d.get(); } | ||
|
||
class HostView { | ||
public: | ||
explicit HostView(size_t maxClusters, int clustersPerStrip, cudaStream_t stream); | ||
|
||
cms::cuda::host::unique_ptr<uint32_t[]> clusterIndex_h; | ||
cms::cuda::host::unique_ptr<uint32_t[]> clusterSize_h; | ||
cms::cuda::host::unique_ptr<uint8_t[]> clusterADCs_h; | ||
cms::cuda::host::unique_ptr<stripgpu::detId_t[]> clusterDetId_h; | ||
cms::cuda::host::unique_ptr<stripgpu::stripId_t[]> firstStrip_h; | ||
cms::cuda::host::unique_ptr<bool[]> trueCluster_h; | ||
cms::cuda::host::unique_ptr<float[]> barycenter_h; | ||
cms::cuda::host::unique_ptr<float[]> charge_h; | ||
int nClusters_h; | ||
}; | ||
|
||
std::unique_ptr<HostView> hostView(int clustersPerStrip, cudaStream_t stream) const; | ||
|
||
private: | ||
cms::cuda::device::unique_ptr<uint32_t[]> clusterIndex_d; | ||
cms::cuda::device::unique_ptr<uint32_t[]> clusterSize_d; | ||
cms::cuda::device::unique_ptr<uint8_t[]> clusterADCs_d; | ||
cms::cuda::device::unique_ptr<stripgpu::detId_t[]> clusterDetId_d; | ||
cms::cuda::device::unique_ptr<stripgpu::stripId_t[]> firstStrip_d; | ||
cms::cuda::device::unique_ptr<bool[]> trueCluster_d; | ||
cms::cuda::device::unique_ptr<float[]> barycenter_d; | ||
cms::cuda::device::unique_ptr<float[]> charge_d; | ||
|
||
cms::cuda::device::unique_ptr<DeviceView> view_d; // "me" pointer | ||
|
||
public: | ||
int nClusters_h; | ||
}; | ||
|
||
#endif |
110 changes: 110 additions & 0 deletions
110
CUDADataFormats/SiStripCluster/src/MkFitSiStripClustersCUDA.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#include "CUDADataFormats/SiStripCluster/interface/MkFitSiStripClustersCUDA.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" | ||
|
||
MkFitSiStripClustersCUDA::MkFitSiStripClustersCUDA(size_t maxClusters, int clustersPerStrip, cudaStream_t stream) { | ||
clusterDetId_d = cms::cuda::make_device_unique<stripgpu::detId_t[]>(maxClusters, stream); | ||
// clusterIndex_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream); | ||
clusterSize_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream); | ||
charge_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
// clusterADCs_d = cms::cuda::make_device_unique<uint8_t[]>(maxClusters * clustersPerStrip, stream); | ||
firstStrip_d = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(maxClusters, stream); | ||
|
||
// local_xx_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
// local_xy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
// local_yy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
// local_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_x_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_y_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_z_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_xx_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_xy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_xz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_yy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_yz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
global_zz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream); | ||
|
||
layer_d = cms::cuda::make_device_unique<short[]>(maxClusters, stream); | ||
|
||
auto gview = cms::cuda::make_host_unique<GlobalDeviceView>(stream); | ||
// gview->local_xx_ = local_xx_d.get(); | ||
// gview->local_xy_ = local_xy_d.get(); | ||
// gview->local_yy_ = local_yy_d.get(); | ||
// gview->local_ = local_d.get(); | ||
gview->global_x_ = global_x_d.get(); | ||
gview->global_y_ = global_y_d.get(); | ||
gview->global_z_ = global_z_d.get(); | ||
gview->global_xx_ = global_xx_d.get(); | ||
gview->global_xy_ = global_xy_d.get(); | ||
gview->global_xz_ = global_xz_d.get(); | ||
gview->global_yy_ = global_yy_d.get(); | ||
gview->global_yz_ = global_yz_d.get(); | ||
gview->global_zz_ = global_zz_d.get(); | ||
gview->charge_ = charge_d.get(); | ||
gview->clusterDetId_ = clusterDetId_d.get(); | ||
// gview->clusterIndex_ = clusterIndex_d.get(); | ||
//gview->clusterADCs_ = clusterADCs_d.get(); | ||
gview->firstStrip_ = firstStrip_d.get(); | ||
gview->clusterSize_ = clusterSize_d.get(); | ||
|
||
gview->layer_ = layer_d.get(); | ||
|
||
gview_d = cms::cuda::make_device_unique<GlobalDeviceView>(stream); | ||
cms::cuda::copyAsync(gview_d, gview, stream); | ||
} | ||
|
||
MkFitSiStripClustersCUDA::HostView::HostView(size_t maxClusters, int clustersPerStrip, cudaStream_t stream) { | ||
clusterDetId_h = cms::cuda::make_host_unique<stripgpu::detId_t[]>(maxClusters, stream); | ||
// clusterIndex_h = cms::cuda::make_host_unique<uint32_t[]>(maxClusters, stream); | ||
charge_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
// clusterADCs_h = cms::cuda::make_host_unique<uint8_t[]>(maxClusters * clustersPerStrip, stream); | ||
firstStrip_h = cms::cuda::make_host_unique<stripgpu::stripId_t[]>(maxClusters, stream); | ||
clusterSize_h = cms::cuda::make_host_unique<uint32_t[]>(maxClusters, stream); | ||
|
||
// local_xx_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
// local_xy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
// local_yy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
// local_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_x_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_y_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_z_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_xx_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_xy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_xz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_yy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_yz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
global_zz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream); | ||
|
||
layer_h = cms::cuda::make_host_unique<short[]>(maxClusters, stream); | ||
|
||
nClusters_h = maxClusters; | ||
} | ||
|
||
std::unique_ptr<MkFitSiStripClustersCUDA::HostView> MkFitSiStripClustersCUDA::hostView(int clustersPerStrip, | ||
cudaStream_t stream) const { | ||
auto view_h = std::make_unique<HostView>(nClusters_h, clustersPerStrip, stream); | ||
|
||
cms::cuda::copyAsync(view_h->clusterDetId_h, clusterDetId_d, nClusters_h, stream); | ||
// cms::cuda::copyAsync(view_h->clusterIndex_h, clusterIndex_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->charge_h, charge_d, nClusters_h, stream); | ||
// cms::cuda::copyAsync(view_h->clusterADCs_h, clusterADCs_d, nClusters_h * clustersPerStrip, stream); | ||
cms::cuda::copyAsync(view_h->firstStrip_h, firstStrip_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->clusterSize_h, clusterSize_d, nClusters_h, stream); | ||
|
||
// cms::cuda::copyAsync(view_h->local_xx_h, local_xx_d, nClusters_h, stream); | ||
// cms::cuda::copyAsync(view_h->local_xy_h, local_xy_d, nClusters_h, stream); | ||
// cms::cuda::copyAsync(view_h->local_yy_h, local_yy_d, nClusters_h, stream); | ||
// cms::cuda::copyAsync(view_h->local_h, local_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_x_h, global_x_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_y_h, global_y_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_z_h, global_z_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_xx_h, global_xx_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_xy_h, global_xy_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_xz_h, global_xz_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_yy_h, global_yy_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_yz_h, global_yz_d, nClusters_h, stream); | ||
cms::cuda::copyAsync(view_h->global_zz_h, global_zz_d, nClusters_h, stream); | ||
|
||
cms::cuda::copyAsync(view_h->layer_h, layer_d, nClusters_h, stream); | ||
|
||
return view_h; | ||
} |
Oops, something went wrong.