-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fully implement fishbone in patatrack #36215
Changes from all commits
67c26fa
654223b
6e3c160
69c6015
626f6fe
6f4b794
fbda253
f76c7af
78b8be1
a5b3e17
8f11205
8d9e72d
088654a
a4a692e
3f4ba47
88cbddc
1c58efd
0842c63
4afbf2c
b2bdc8a
ac07875
eaa9260
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,6 +138,7 @@ int main() { | |
go<5>(true); | ||
go<6>(true); | ||
|
||
// go<10>(); | ||
go<10>(false); | ||
go<10>(true); | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#include "DataFormats/Math/interface/choleskyInversion.h" | ||
|
||
using namespace math::cholesky; | ||
|
||
#include <Eigen/Core> | ||
#include <Eigen/Eigenvalues> | ||
|
||
#include <iomanip> | ||
#include <memory> | ||
#include <algorithm> | ||
#include <chrono> | ||
#include <random> | ||
|
||
#include <cassert> | ||
#include <iostream> | ||
#include <limits> | ||
|
||
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" | ||
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" | ||
|
||
template <typename M, int N> | ||
__global__ void invert(M* mm, int n) { | ||
auto i = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (i >= n) | ||
return; | ||
|
||
auto& m = mm[i]; | ||
|
||
printf("before %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1)); | ||
|
||
invertNN(m, m); | ||
|
||
printf("after %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1)); | ||
} | ||
|
||
template <typename M, int N> | ||
__global__ void invertE(M* mm, int n) { | ||
auto i = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (i >= n) | ||
return; | ||
|
||
auto& m = mm[i]; | ||
|
||
printf("before %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1)); | ||
|
||
m = m.inverse(); | ||
|
||
printf("after %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1)); | ||
} | ||
|
||
// generate matrices | ||
template <class M> | ||
void genMatrix(M& m) { | ||
using T = typename std::remove_reference<decltype(m(0, 0))>::type; | ||
int n = M::ColsAtCompileTime; | ||
std::mt19937 eng; | ||
// std::mt19937 eng2; | ||
std::uniform_real_distribution<T> rgen(0., 1.); | ||
|
||
// generate first diagonal elemets | ||
for (int i = 0; i < n; ++i) { | ||
double maxVal = i * 10000 / (n - 1) + 1; // max condition is 10^4 | ||
m(i, i) = maxVal * rgen(eng); | ||
} | ||
for (int i = 0; i < n; ++i) { | ||
for (int j = 0; j < i; ++j) { | ||
double v = 0.3 * std::sqrt(m(i, i) * m(j, j)); // this makes the matrix pos defined | ||
m(i, j) = v * rgen(eng); | ||
m(j, i) = m(i, j); | ||
} | ||
} | ||
} | ||
|
||
template <int DIM> | ||
using MXN = Eigen::Matrix<double, DIM, DIM>; | ||
|
||
int main() { | ||
cms::cudatest::requireDevices(); | ||
|
||
constexpr int DIM = 6; | ||
|
||
using M = MXN<DIM>; | ||
|
||
M m; | ||
|
||
genMatrix(m); | ||
|
||
printf("on CPU before %d %f %f %f\n", DIM, m(0, 0), m(1, 0), m(1, 1)); | ||
|
||
invertNN(m, m); | ||
|
||
printf("on CPU after %d %f %f %f\n", DIM, m(0, 0), m(1, 0), m(1, 1)); | ||
|
||
double* d; | ||
cudaMalloc(&d, sizeof(M)); | ||
cudaMemcpy(d, &m, sizeof(M), cudaMemcpyHostToDevice); | ||
invert<M, DIM><<<1, 1>>>((M*)d, 1); | ||
cudaCheck(cudaDeviceSynchronize()); | ||
invertE<M, DIM><<<1, 1>>>((M*)d, 1); | ||
cudaCheck(cudaDeviceSynchronize()); | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,17 +27,20 @@ namespace phase1PixelTopology { | |
|
||
constexpr uint32_t numberOfModules = 1856; | ||
constexpr uint32_t numberOfLayers = 10; | ||
constexpr uint32_t layerStart[numberOfLayers + 1] = {0, | ||
96, | ||
320, | ||
672, // barrel | ||
1184, | ||
1296, | ||
1408, // positive endcap | ||
1520, | ||
1632, | ||
1744, // negative endcap | ||
numberOfModules}; | ||
#ifdef __CUDA_ARCH__ | ||
__device__ | ||
#endif | ||
Comment on lines
+30
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like we should "abstract" this approach and make it easier to declare Of course this PR is ok as it is, I'm just thinking how we can improve things later. Maybe something like #ifdef __CUDA_ARCH__
#define HOST_DEVICE_AGGREGATE_CONSTANT __device__ constexpr
#else
#define HOST_DEVICE_AGGREGATE_CONSTANT constexpr
#endif ? Or maybe a simpler name :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not want to introduce dependencies in this file in this moment (is used also in code that does not depend on Heterogeous) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened #36311 to remember about this. |
||
constexpr uint32_t layerStart[numberOfLayers + 1] = {0, | ||
96, | ||
320, | ||
672, // barrel | ||
1184, | ||
1296, | ||
1408, // positive endcap | ||
1520, | ||
1632, | ||
1744, // negative endcap | ||
numberOfModules}; | ||
constexpr char const* layerName[numberOfLayers] = { | ||
"BL1", | ||
"BL2", | ||
|
@@ -84,8 +87,8 @@ namespace phase1PixelTopology { | |
|
||
constexpr uint32_t maxModuleStride = findMaxModuleStride(); | ||
|
||
constexpr uint8_t findLayer(uint32_t detId) { | ||
for (uint8_t i = 0; i < std::size(layerStart); ++i) | ||
constexpr uint8_t findLayer(uint32_t detId, uint8_t sl = 0) { | ||
for (uint8_t i = sl; i < std::size(layerStart); ++i) | ||
if (detId < layerStart[i + 1]) | ||
return i; | ||
return std::size(layerStart); | ||
|
@@ -100,7 +103,15 @@ namespace phase1PixelTopology { | |
} | ||
|
||
constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride; | ||
constexpr std::array<uint8_t, layerIndexSize> layer = map_to_array<layerIndexSize>(findLayerFromCompact); | ||
#ifdef __CUDA_ARCH__ | ||
__device__ | ||
#endif | ||
constexpr std::array<uint8_t, layerIndexSize> | ||
layer = map_to_array<layerIndexSize>(findLayerFromCompact); | ||
|
||
constexpr uint8_t getLayer(uint32_t detId) { | ||
return phase1PixelTopology::layer[detId / phase1PixelTopology::maxModuleStride]; | ||
} | ||
|
||
constexpr bool validateLayerIndex() { | ||
bool res = true; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how strict are the rules for the dependencies in the CUDADataFormats?
@makortel please clarify.
Here, a dependence on
Geometry/TrackerGeometryBuilder
seems somewhat out of order.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
CUDADataFormats
fall in the same category as "transient products".There is already a dependence here
cmssw/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h
Line 9 in df8ebdd
so this PR is not making things (much) worse.
But probably (after this PR) it would be necessary to think again its placement, because in the long term I'd assume classes using the
phase1PixelTopology
to be persisted (at which point the relevant classes should be defined inDataFormats
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dependency on Geometry is allowed in DataFormat since ages
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DataFormats
are allowed to depend on some subpackages ofGeometry
, but not everything. Currently the set is limited toGeometry/CaloGeometry
Geometry/CommonDetUnit
Geometry/CommonTopologies
Geometry/GEMGeometry
(from FWLite build set).
Geometry/TrackerGeometryBuilder
itself would bring inDetectorDescription/Core
DetectorDescription/DDCMS
CondFormats/GeometryObjects
FWCore/ParameterSet
(although this might be possible to remove?)dd4hep
These dependencies would be too much for
DataFormats
(although there is already one case depending onFWCore/ParameterSet
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for what I am concerned
phase1PixelTopology.h
can move anywhere else: the whole idea is to have geometry constants as constant not as database or DD quantities!Moving elsewhere will need changes in many packages. I propose to open an issue and not hold this specific PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed, I was trying to think for longer term.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened an issue #36302