Skip to content

Commit

Permalink
Merge pull request #36215 from VinInn/ImproveFishbone2
Browse files Browse the repository at this point in the history
Fully implement fishbone in patatrack
  • Loading branch information
cmsbuild authored Dec 1, 2021
2 parents bc75e59 + eaa9260 commit 94464ad
Show file tree
Hide file tree
Showing 27 changed files with 686 additions and 338 deletions.
19 changes: 19 additions & 0 deletions CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <algorithm>

#include "CUDADataFormats/Track/interface/TrajectoryStateSoAT.h"
#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h"
#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"

#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
Expand Down Expand Up @@ -42,8 +43,26 @@ class TrackSoAHeterogeneousT {
// this is chi2/ndof as not necessarely all hits are used in the fit
eigenSoA::ScalarSoA<float, S> chi2;

eigenSoA::ScalarSoA<int8_t, S> nLayers;

constexpr int nHits(int i) const { return detIndices.size(i); }

constexpr bool isTriplet(int i) const { return nLayers(i) == 3; }

constexpr int computeNumberOfLayers(int32_t i) const {
// layers are in order and we assume tracks are either forward or backward
auto pdet = detIndices.begin(i);
int nl = 1;
auto ol = phase1PixelTopology::getLayer(*pdet);
for (; pdet < detIndices.end(i); ++pdet) {
auto il = phase1PixelTopology::getLayer(*pdet);
if (il != ol)
++nl;
ol = il;
}
return nl;
}

// State at the Beam spot
// phi,tip,1/pt,cotan(theta),zip
TrajectoryStateSoAT<S> stateAtBS;
Expand Down
87 changes: 78 additions & 9 deletions DataFormats/Math/interface/choleskyInversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,78 @@

#include <Eigen/Core>

/**
namespace math {
namespace cholesky {

template <typename M1, typename M2, int N = M2::ColsAtCompileTime>
// without this: either does not compile or compiles and then fails silently at runtime
#ifdef __CUDACC__
__host__ __device__
#endif
inline constexpr void
invertNN(M1 const& src, M2& dst) {

// origin: CERNLIB

using T = typename M2::Scalar;

T a[N][N];
for (int i = 0; i < N; ++i) {
a[i][i] = src(i, i);
for (int j = i + 1; j < N; ++j)
a[j][i] = src(i, j);
}

for (int j = 0; j < N; ++j) {
a[j][j] = T(1.) / a[j][j];
int jp1 = j + 1;
for (int l = jp1; l < N; ++l) {
a[j][l] = a[j][j] * a[l][j];
T s1 = -a[l][jp1];
for (int i = 0; i < jp1; ++i)
s1 += a[l][i] * a[i][jp1];
a[l][jp1] = -s1;
}
}

if constexpr (N == 1) {
dst(0, 0) = a[0][0];
return;
}
a[0][1] = -a[0][1];
a[1][0] = a[0][1] * a[1][1];
for (int j = 2; j < N; ++j) {
int jm1 = j - 1;
for (int k = 0; k < jm1; ++k) {
T s31 = a[k][j];
for (int i = k; i < jm1; ++i)
s31 += a[k][i + 1] * a[i + 1][j];
a[k][j] = -s31;
a[j][k] = -s31 * a[j][j];
}
a[jm1][j] = -a[jm1][j];
a[j][jm1] = a[jm1][j] * a[j][j];
}

int j = 0;
while (j < N - 1) {
T s33 = a[j][j];
for (int i = j + 1; i < N; ++i)
s33 += a[j][i] * a[i][j];
dst(j, j) = s33;

++j;
for (int k = 0; k < j; ++k) {
T s32 = 0;
for (int i = j; i < N; ++i)
s32 += a[k][i] * a[i][j];
dst(k, j) = dst(j, k) = s32;
}
}
dst(j, j) = a[j][j];
}

/**
* fully inlined specialized code to perform the inversion of a
* positive defined matrix of rank up to 6.
*
Expand All @@ -16,8 +87,6 @@
*
*
*/
namespace math {
namespace cholesky {

template <typename M1, typename M2>
inline constexpr void __attribute__((always_inline)) invert11(M1 const& src, M2& dst) {
Expand Down Expand Up @@ -336,12 +405,12 @@ namespace math {
};

// Eigen interface
template <typename D1, typename D2>
inline constexpr void __attribute__((always_inline))
invert(Eigen::DenseBase<D1> const& src, Eigen::DenseBase<D2>& dst) {
using M1 = Eigen::DenseBase<D1>;
using M2 = Eigen::DenseBase<D2>;
Inverter<M1, M2, M2::ColsAtCompileTime>::eval(src, dst);
template <typename M1, typename M2>
inline constexpr void __attribute__((always_inline)) invert(M1 const& src, M2& dst) {
if constexpr (M2::ColsAtCompileTime < 7)
Inverter<M1, M2, M2::ColsAtCompileTime>::eval(src, dst);
else
invertNN(src, dst);
}

} // namespace cholesky
Expand Down
6 changes: 6 additions & 0 deletions DataFormats/Math/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,10 @@
<flags CUDA_FLAGS="-w"/>
</bin>

<bin file="simpleCholeskyTest.cu">
<use name="cuda"/>
<use name="HeterogeneousCore/CUDAUtilities"/>
<flags CUDA_FLAGS="-w"/>
</bin>

</iftool>
3 changes: 2 additions & 1 deletion DataFormats/Math/test/CholeskyInvert_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ int main() {
go<5>(true);
go<6>(true);

// go<10>();
go<10>(false);
go<10>(true);
return 0;
}
9 changes: 6 additions & 3 deletions DataFormats/Math/test/CholeskyInvert_t.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ template <int N>
void go(bool soa) {
constexpr unsigned int DIM = N;
using MX = MXN<DIM>;
std::cout << "testing Matrix of dimension " << DIM << " size " << sizeof(MX) << std::endl;
std::cout << "testing Matrix of dimension " << DIM << " size " << sizeof(MX) << " in " << (soa ? "SOA" : "AOS")
<< " mode" << std::endl;

auto start = std::chrono::high_resolution_clock::now();
auto delta = start - start;
Expand Down Expand Up @@ -197,13 +198,15 @@ int main() {
go<4>(false);
go<5>(false);
go<6>(false);
go<7>(false);
go<10>(false);

go<2>(true);
go<3>(true);
go<4>(true);
go<5>(true);
go<6>(true);

// go<10>();
go<7>(true);
go<10>(true);
return 0;
}
103 changes: 103 additions & 0 deletions DataFormats/Math/test/simpleCholeskyTest.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#include "DataFormats/Math/interface/choleskyInversion.h"

using namespace math::cholesky;

#include <Eigen/Core>
#include <Eigen/Eigenvalues>

#include <iomanip>
#include <memory>
#include <algorithm>
#include <chrono>
#include <random>

#include <cassert>
#include <iostream>
#include <limits>

#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"

template <typename M, int N>
__global__ void invert(M* mm, int n) {
auto i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= n)
return;

auto& m = mm[i];

printf("before %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1));

invertNN(m, m);

printf("after %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1));
}

template <typename M, int N>
__global__ void invertE(M* mm, int n) {
auto i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= n)
return;

auto& m = mm[i];

printf("before %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1));

m = m.inverse();

printf("after %d %f %f %f\n", N, m(0, 0), m(1, 0), m(1, 1));
}

// generate matrices
template <class M>
void genMatrix(M& m) {
using T = typename std::remove_reference<decltype(m(0, 0))>::type;
int n = M::ColsAtCompileTime;
std::mt19937 eng;
// std::mt19937 eng2;
std::uniform_real_distribution<T> rgen(0., 1.);

// generate first diagonal elemets
for (int i = 0; i < n; ++i) {
double maxVal = i * 10000 / (n - 1) + 1; // max condition is 10^4
m(i, i) = maxVal * rgen(eng);
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
double v = 0.3 * std::sqrt(m(i, i) * m(j, j)); // this makes the matrix pos defined
m(i, j) = v * rgen(eng);
m(j, i) = m(i, j);
}
}
}

template <int DIM>
using MXN = Eigen::Matrix<double, DIM, DIM>;

int main() {
cms::cudatest::requireDevices();

constexpr int DIM = 6;

using M = MXN<DIM>;

M m;

genMatrix(m);

printf("on CPU before %d %f %f %f\n", DIM, m(0, 0), m(1, 0), m(1, 1));

invertNN(m, m);

printf("on CPU after %d %f %f %f\n", DIM, m(0, 0), m(1, 0), m(1, 1));

double* d;
cudaMalloc(&d, sizeof(M));
cudaMemcpy(d, &m, sizeof(M), cudaMemcpyHostToDevice);
invert<M, DIM><<<1, 1>>>((M*)d, 1);
cudaCheck(cudaDeviceSynchronize());
invertE<M, DIM><<<1, 1>>>((M*)d, 1);
cudaCheck(cudaDeviceSynchronize());

return 0;
}
39 changes: 25 additions & 14 deletions Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@ namespace phase1PixelTopology {

constexpr uint32_t numberOfModules = 1856;
constexpr uint32_t numberOfLayers = 10;
constexpr uint32_t layerStart[numberOfLayers + 1] = {0,
96,
320,
672, // barrel
1184,
1296,
1408, // positive endcap
1520,
1632,
1744, // negative endcap
numberOfModules};
#ifdef __CUDA_ARCH__
__device__
#endif
constexpr uint32_t layerStart[numberOfLayers + 1] = {0,
96,
320,
672, // barrel
1184,
1296,
1408, // positive endcap
1520,
1632,
1744, // negative endcap
numberOfModules};
constexpr char const* layerName[numberOfLayers] = {
"BL1",
"BL2",
Expand Down Expand Up @@ -84,8 +87,8 @@ namespace phase1PixelTopology {

constexpr uint32_t maxModuleStride = findMaxModuleStride();

constexpr uint8_t findLayer(uint32_t detId) {
for (uint8_t i = 0; i < std::size(layerStart); ++i)
constexpr uint8_t findLayer(uint32_t detId, uint8_t sl = 0) {
for (uint8_t i = sl; i < std::size(layerStart); ++i)
if (detId < layerStart[i + 1])
return i;
return std::size(layerStart);
Expand All @@ -100,7 +103,15 @@ namespace phase1PixelTopology {
}

constexpr uint32_t layerIndexSize = numberOfModules / maxModuleStride;
constexpr std::array<uint8_t, layerIndexSize> layer = map_to_array<layerIndexSize>(findLayerFromCompact);
#ifdef __CUDA_ARCH__
__device__
#endif
constexpr std::array<uint8_t, layerIndexSize>
layer = map_to_array<layerIndexSize>(findLayerFromCompact);

constexpr uint8_t getLayer(uint32_t detId) {
return phase1PixelTopology::layer[detId / phase1PixelTopology::maxModuleStride];
}

constexpr bool validateLayerIndex() {
bool res = true;
Expand Down
6 changes: 5 additions & 1 deletion Geometry/TrackerGeometryBuilder/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@
<flags EDM_PLUGIN="1"/>
</library>

<bin file="phase1PixelTopology_t.cpp"/>
<bin file="phase1PixelTopology_t.cu">
<use name="HeterogeneousCore/CUDAUtilities"/>
<use name="cuda"/>
<flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
</bin>

<bin file="runTests.cpp" name="GeometryTrackerGeometryBuilderTestDriver">
<use name="FWCore/Utilities"/>
Expand Down
Loading

0 comments on commit 94464ad

Please sign in to comment.