Skip to content

Commit

Permalink
rebase to 12_0_X
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Riley authored and Dan Riley committed Jul 6, 2021
1 parent 1d915f0 commit 9735bff
Show file tree
Hide file tree
Showing 9 changed files with 558 additions and 0 deletions.
144 changes: 144 additions & 0 deletions CUDADataFormats/SiStripCluster/interface/MkFitSiStripClustersCUDA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#ifndef CUDADataFormats_SiStripCluster_interface_MkFitSiStripClustersCUDA_h
#define CUDADataFormats_SiStripCluster_interface_MkFitSiStripClustersCUDA_h

#include "CUDADataFormats/SiStripCluster/interface/GPUtypes.h"

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"

#include <cuda_runtime.h>

class MkFitSiStripClustersCUDA {
public:
MkFitSiStripClustersCUDA() = default;
explicit MkFitSiStripClustersCUDA(size_t maxClusters, int clustersPerStrip, cudaStream_t stream);
~MkFitSiStripClustersCUDA() = default;

MkFitSiStripClustersCUDA(const MkFitSiStripClustersCUDA &) = delete;
MkFitSiStripClustersCUDA &operator=(const MkFitSiStripClustersCUDA &) = delete;
MkFitSiStripClustersCUDA(MkFitSiStripClustersCUDA &&) = default;
MkFitSiStripClustersCUDA &operator=(MkFitSiStripClustersCUDA &&) = default;

void setNClusters(uint32_t nClusters) { nClusters_h = nClusters; }

uint32_t nClusters() const { return nClusters_h; }

class GlobalDeviceView {
public:
// __device__ __forceinline__ float local_xx(int i) const { return __ldg(local_xx_ + i); }
// __device__ __forceinline__ float local_xy(int i) const { return __ldg(local_xy_ + i); }
// __device__ __forceinline__ float local_yy(int i) const { return __ldg(local_yy_ + i); }
// __device__ __forceinline__ float local(int i) const { return __ldg(local_ + i); }
__device__ __forceinline__ float global_x(int i) const { return __ldg(global_x_ + i); }
__device__ __forceinline__ float global_y(int i) const { return __ldg(global_y_ + i); }
__device__ __forceinline__ float global_z(int i) const { return __ldg(global_z_ + i); }

__device__ __forceinline__ float global_xx(int i) const { return __ldg(global_xx_ + i); }
__device__ __forceinline__ float global_xy(int i) const { return __ldg(global_xy_ + i); }
__device__ __forceinline__ float global_xz(int i) const { return __ldg(global_xz_ + i); }
__device__ __forceinline__ float global_yy(int i) const { return __ldg(global_yy_ + i); }
__device__ __forceinline__ float global_yz(int i) const { return __ldg(global_yz_ + i); }
__device__ __forceinline__ float global_zz(int i) const { return __ldg(global_zz_ + i); }

__device__ __forceinline__ short layer(int i) const { return __ldg(layer_ + i); }
__device__ __forceinline__ float charge(int i) const { return __ldg(charge_ + i); }
// __device__ __forceinline__ stripgpu::detId_t clusterDetId(int i) const { return __ldg(clusterDetId_ + i); }
// __device__ __forceinline__ uint32_t clusterIndex(int i) const { return __ldg(clusterIndex_ + i); }
// __device__ __forceinline__ uint8_t clusterADCs(int i) const { return __ldg(clusterADCs_ + i); }
__device__ __forceinline__ stripgpu::stripId_t firstStrip(int i) const { return __ldg(firstStrip_ + i); }
__device__ __forceinline__ uint32_t clusterSize(int i) const { return __ldg(clusterSize_ + i); }

friend MkFitSiStripClustersCUDA;

// private:
int nClusters_;

// float *local_xx_;
// float *local_xy_;
// float *local_yy_;
// float *local_;
float *global_x_;
float *global_y_;
float *global_z_;

float *global_xx_;
float *global_xy_;
float *global_xz_;
float *global_yy_;
float *global_yz_;
float *global_zz_;

short *layer_;
float *charge_;
stripgpu::detId_t *clusterDetId_;
// uint32_t *clusterIndex_;
//uint8_t *clusterADCs_;
stripgpu::stripId_t *firstStrip_;
uint32_t *clusterSize_;
};

GlobalDeviceView *gview() const { return gview_d.get(); }

class HostView {
public:
explicit HostView(size_t maxClusters, int clustersPerStrip, cudaStream_t stream);

cms::cuda::host::unique_ptr<stripgpu::detId_t[]> clusterDetId_h;
// cms::cuda::host::unique_ptr<uint32_t[]> clusterIndex_h;
cms::cuda::host::unique_ptr<float[]> charge_h;
//cms::cuda::host::unique_ptr<uint8_t[]> clusterADCs_h;
cms::cuda::host::unique_ptr<uint32_t[]> clusterSize_h;
cms::cuda::host::unique_ptr<stripgpu::stripId_t[]> firstStrip_h;

// cms::cuda::host::unique_ptr<float[]> local_xx_h;
// cms::cuda::host::unique_ptr<float[]> local_xy_h;
// cms::cuda::host::unique_ptr<float[]> local_yy_h;
// cms::cuda::host::unique_ptr<float[]> local_h;
cms::cuda::host::unique_ptr<float[]> global_x_h;
cms::cuda::host::unique_ptr<float[]> global_y_h;
cms::cuda::host::unique_ptr<float[]> global_z_h;
cms::cuda::host::unique_ptr<float[]> global_xx_h;
cms::cuda::host::unique_ptr<float[]> global_xy_h;
cms::cuda::host::unique_ptr<float[]> global_xz_h;
cms::cuda::host::unique_ptr<float[]> global_yy_h;
cms::cuda::host::unique_ptr<float[]> global_yz_h;
cms::cuda::host::unique_ptr<float[]> global_zz_h;

cms::cuda::host::unique_ptr<short[]> layer_h;
int nClusters_h;
};

std::unique_ptr<HostView> hostView(int clustersPerStrip, cudaStream_t stream) const;

private:
cms::cuda::device::unique_ptr<stripgpu::detId_t[]> clusterDetId_d;
// cms::cuda::device::unique_ptr<uint32_t[]> clusterIndex_d;
cms::cuda::device::unique_ptr<float[]> charge_d;
//cms::cuda::device::unique_ptr<uint8_t[]> clusterADCs_d;
cms::cuda::device::unique_ptr<uint32_t[]> clusterSize_d;
cms::cuda::device::unique_ptr<stripgpu::stripId_t[]> firstStrip_d;

// cms::cuda::device::unique_ptr<float[]> local_xx_d;
// cms::cuda::device::unique_ptr<float[]> local_xy_d;
// cms::cuda::device::unique_ptr<float[]> local_yy_d;
// cms::cuda::device::unique_ptr<float[]> local_d;
cms::cuda::device::unique_ptr<float[]> global_x_d;
cms::cuda::device::unique_ptr<float[]> global_y_d;
cms::cuda::device::unique_ptr<float[]> global_z_d;
cms::cuda::device::unique_ptr<float[]> global_xx_d;
cms::cuda::device::unique_ptr<float[]> global_xy_d;
cms::cuda::device::unique_ptr<float[]> global_xz_d;
cms::cuda::device::unique_ptr<float[]> global_yy_d;
cms::cuda::device::unique_ptr<float[]> global_yz_d;
cms::cuda::device::unique_ptr<float[]> global_zz_d;

cms::cuda::device::unique_ptr<short[]> layer_d;

cms::cuda::device::unique_ptr<GlobalDeviceView> gview_d; // "me" pointer

public:
int nClusters_h;
};

#endif
110 changes: 110 additions & 0 deletions CUDADataFormats/SiStripCluster/src/MkFitSiStripClustersCUDA.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include "CUDADataFormats/SiStripCluster/interface/MkFitSiStripClustersCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

MkFitSiStripClustersCUDA::MkFitSiStripClustersCUDA(size_t maxClusters, int clustersPerStrip, cudaStream_t stream) {
clusterDetId_d = cms::cuda::make_device_unique<stripgpu::detId_t[]>(maxClusters, stream);
// clusterIndex_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
clusterSize_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
charge_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
// clusterADCs_d = cms::cuda::make_device_unique<uint8_t[]>(maxClusters * clustersPerStrip, stream);
firstStrip_d = cms::cuda::make_device_unique<stripgpu::stripId_t[]>(maxClusters, stream);

// local_xx_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
// local_xy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
// local_yy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
// local_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_x_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_y_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_z_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_xx_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_xy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_xz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_yy_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_yz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);
global_zz_d = cms::cuda::make_device_unique<float[]>(maxClusters, stream);

layer_d = cms::cuda::make_device_unique<short[]>(maxClusters, stream);

auto gview = cms::cuda::make_host_unique<GlobalDeviceView>(stream);
// gview->local_xx_ = local_xx_d.get();
// gview->local_xy_ = local_xy_d.get();
// gview->local_yy_ = local_yy_d.get();
// gview->local_ = local_d.get();
gview->global_x_ = global_x_d.get();
gview->global_y_ = global_y_d.get();
gview->global_z_ = global_z_d.get();
gview->global_xx_ = global_xx_d.get();
gview->global_xy_ = global_xy_d.get();
gview->global_xz_ = global_xz_d.get();
gview->global_yy_ = global_yy_d.get();
gview->global_yz_ = global_yz_d.get();
gview->global_zz_ = global_zz_d.get();
gview->charge_ = charge_d.get();
gview->clusterDetId_ = clusterDetId_d.get();
// gview->clusterIndex_ = clusterIndex_d.get();
//gview->clusterADCs_ = clusterADCs_d.get();
gview->firstStrip_ = firstStrip_d.get();
gview->clusterSize_ = clusterSize_d.get();

gview->layer_ = layer_d.get();

gview_d = cms::cuda::make_device_unique<GlobalDeviceView>(stream);
cms::cuda::copyAsync(gview_d, gview, stream);
}

MkFitSiStripClustersCUDA::HostView::HostView(size_t maxClusters, int clustersPerStrip, cudaStream_t stream) {
clusterDetId_h = cms::cuda::make_host_unique<stripgpu::detId_t[]>(maxClusters, stream);
// clusterIndex_h = cms::cuda::make_host_unique<uint32_t[]>(maxClusters, stream);
charge_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
// clusterADCs_h = cms::cuda::make_host_unique<uint8_t[]>(maxClusters * clustersPerStrip, stream);
firstStrip_h = cms::cuda::make_host_unique<stripgpu::stripId_t[]>(maxClusters, stream);
clusterSize_h = cms::cuda::make_host_unique<uint32_t[]>(maxClusters, stream);

// local_xx_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
// local_xy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
// local_yy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
// local_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_x_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_y_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_z_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_xx_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_xy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_xz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_yy_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_yz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);
global_zz_h = cms::cuda::make_host_unique<float[]>(maxClusters, stream);

layer_h = cms::cuda::make_host_unique<short[]>(maxClusters, stream);

nClusters_h = maxClusters;
}

std::unique_ptr<MkFitSiStripClustersCUDA::HostView> MkFitSiStripClustersCUDA::hostView(int clustersPerStrip,
cudaStream_t stream) const {
auto view_h = std::make_unique<HostView>(nClusters_h, clustersPerStrip, stream);

cms::cuda::copyAsync(view_h->clusterDetId_h, clusterDetId_d, nClusters_h, stream);
// cms::cuda::copyAsync(view_h->clusterIndex_h, clusterIndex_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->charge_h, charge_d, nClusters_h, stream);
// cms::cuda::copyAsync(view_h->clusterADCs_h, clusterADCs_d, nClusters_h * clustersPerStrip, stream);
cms::cuda::copyAsync(view_h->firstStrip_h, firstStrip_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->clusterSize_h, clusterSize_d, nClusters_h, stream);

// cms::cuda::copyAsync(view_h->local_xx_h, local_xx_d, nClusters_h, stream);
// cms::cuda::copyAsync(view_h->local_xy_h, local_xy_d, nClusters_h, stream);
// cms::cuda::copyAsync(view_h->local_yy_h, local_yy_d, nClusters_h, stream);
// cms::cuda::copyAsync(view_h->local_h, local_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_x_h, global_x_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_y_h, global_y_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_z_h, global_z_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_xx_h, global_xx_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_xy_h, global_xy_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_xz_h, global_xz_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_yy_h, global_yy_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_yz_h, global_yz_d, nClusters_h, stream);
cms::cuda::copyAsync(view_h->global_zz_h, global_zz_d, nClusters_h, stream);

cms::cuda::copyAsync(view_h->layer_h, layer_d, nClusters_h, stream);

return view_h;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef RecoLocalTracker_Records_SiStripClusterizerGPUConditionsRcd_h
#define RecoLocalTracker_Records_SiStripClusterizerGPUConditionsRcd_h
#include "FWCore/Framework/interface/DependentRecordImplementation.h"
#include "FWCore/Utilities/interface/mplVector.h"

#include "CalibTracker/Records/interface/SiStripDependentRecords.h"

class SiStripClusterizerGPUConditionsRcd : public edm::eventsetup::DependentRecordImplementation<
SiStripClusterizerGPUConditionsRcd,
edm::mpl::Vector<SiStripGainRcd, SiStripNoisesRcd, SiStripQualityRcd>> {
};

#endif // RecoLocalTracker_Records_SiStripClusterizerGPUConditionsRcd_h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "RecoLocalTracker/Records/interface/SiStripClusterizerGPUConditionsRcd.h"

#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h"
EVENTSETUP_RECORD_REG(SiStripClusterizerGPUConditionsRcd);
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "FWCore/Utilities/interface/typelookup.h"

#include "SiStripConditionsGPUWrapper.h"

TYPELOOKUP_DATA_REG(SiStripConditionsGPUWrapper);
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**\class SiStripClusterizerGPUConditionsESProducer
*
* Create a GPU cache object for fast access to conditions needed by the SiStrip clusterizer
*
* @see SiStripClusterizerConditions
*/
#include <memory>

#include "FWCore/Framework/interface/ModuleFactory.h"
#include "FWCore/Framework/interface/ESProducer.h"
#include "FWCore/Framework/interface/ESHandle.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"

#include "RecoLocalTracker/Records/interface/SiStripClusterizerGPUConditionsRcd.h"

#include "CalibFormats/SiStripObjects/interface/SiStripGain.h"
#include "CalibFormats/SiStripObjects/interface/SiStripDetCabling.h"

#include "SiStripConditionsGPUWrapper.h"

class SiStripClusterizerGPUConditionsESProducer : public edm::ESProducer {
public:
SiStripClusterizerGPUConditionsESProducer(const edm::ParameterSet&);
~SiStripClusterizerGPUConditionsESProducer() override {}

static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);

using ReturnType = std::unique_ptr<SiStripConditionsGPUWrapper>;
ReturnType produce(const SiStripClusterizerGPUConditionsRcd&);

private:
edm::ESGetToken<SiStripGain, SiStripGainRcd> m_gainToken;
edm::ESGetToken<SiStripNoises, SiStripNoisesRcd> m_noisesToken;
edm::ESGetToken<SiStripQuality, SiStripQualityRcd> m_qualityToken;
};

SiStripClusterizerGPUConditionsESProducer::SiStripClusterizerGPUConditionsESProducer(const edm::ParameterSet& iConfig) {
auto cc = setWhatProduced(this, iConfig.getParameter<std::string>("Label"));

m_gainToken = cc.consumesFrom<SiStripGain, SiStripGainRcd>();
m_noisesToken = cc.consumesFrom<SiStripNoises, SiStripNoisesRcd>();
m_qualityToken = cc.consumesFrom<SiStripQuality, SiStripQualityRcd>(
edm::ESInputTag{"", iConfig.getParameter<std::string>("QualityLabel")});
}

void SiStripClusterizerGPUConditionsESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
edm::ParameterSetDescription desc;
desc.add<std::string>("QualityLabel", "");
desc.add<std::string>("Label", "");
descriptions.add("SiStripClusterizerGPUConditionsESProducer", desc);
}

SiStripClusterizerGPUConditionsESProducer::ReturnType SiStripClusterizerGPUConditionsESProducer::produce(
const SiStripClusterizerGPUConditionsRcd& iRecord) {
auto gainsH = iRecord.getTransientHandle(m_gainToken);
const auto& noises = iRecord.get(m_noisesToken);
const auto& quality = iRecord.get(m_qualityToken);

return std::make_unique<SiStripConditionsGPUWrapper>(quality, gainsH.product(), noises);
}

DEFINE_FWK_EVENTSETUP_MODULE(SiStripClusterizerGPUConditionsESProducer);
Loading

0 comments on commit 9735bff

Please sign in to comment.