-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix a race condition in splitVertices #45656
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,30 +19,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { | |
using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(const TAcc& acc, | ||
VtxSoAView& pdata, | ||
WsSoAView& pws, | ||
VtxSoAView& data, | ||
WsSoAView& ws, | ||
float maxChi2) { | ||
constexpr bool verbose = false; // in principle the compiler should optmize out if false | ||
const uint32_t threadIdxLocal(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]); | ||
|
||
auto& __restrict__ data = pdata; | ||
auto& __restrict__ ws = pws; | ||
auto nt = ws.ntrks(); | ||
float const* __restrict__ zt = ws.zt(); | ||
float const* __restrict__ ezt2 = ws.ezt2(); | ||
float* __restrict__ zv = data.zv(); | ||
float* __restrict__ wv = data.wv(); | ||
float const* __restrict__ chi2 = data.chi2(); | ||
uint32_t& nvFinal = data.nvFinal(); | ||
|
||
int32_t const* __restrict__ nn = data.ndof(); | ||
int32_t* __restrict__ iv = ws.iv(); | ||
|
||
ALPAKA_ASSERT_ACC(zt); | ||
ALPAKA_ASSERT_ACC(wv); | ||
ALPAKA_ASSERT_ACC(chi2); | ||
ALPAKA_ASSERT_ACC(nn); | ||
Comment on lines
-41
to
-44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are no longer needed, because the SoA View should guarantee that these are non-null. |
||
|
||
constexpr uint32_t MAXTK = 512; | ||
|
||
auto& it = alpaka::declareSharedVar<uint32_t[MAXTK], __COUNTER__>(acc); // track index | ||
|
@@ -51,32 +31,33 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { | |
auto& ww = alpaka::declareSharedVar<float[MAXTK], __COUNTER__>(acc); // z weight | ||
auto& nq = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc); // number of track for this vertex | ||
|
||
const uint32_t blockIdx(alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u]); | ||
const uint32_t gridDimension(alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u]); | ||
|
||
// one vertex per block | ||
for (auto kv = blockIdx; kv < nvFinal; kv += gridDimension) { | ||
if (nn[kv] < 4) | ||
for (auto kv : cms::alpakatools::independent_groups(acc, data.nvFinal())) { | ||
int32_t ndof = data[kv].ndof(); | ||
if (ndof < 4) | ||
continue; | ||
if (chi2[kv] < maxChi2 * float(nn[kv])) | ||
if (data[kv].chi2() < maxChi2 * float(ndof)) | ||
continue; | ||
|
||
ALPAKA_ASSERT_ACC(nn[kv] < int32_t(MAXTK)); | ||
ALPAKA_ASSERT_ACC(ndof < int32_t(MAXTK)); | ||
|
||
if ((uint32_t)nn[kv] >= MAXTK) | ||
if ((uint32_t)ndof >= MAXTK) | ||
continue; // too bad FIXME | ||
|
||
nq = 0u; | ||
if (cms::alpakatools::once_per_block(acc)) { | ||
// reset the number of tracks for the current vertex | ||
nq = 0u; | ||
} | ||
alpaka::syncBlockThreads(acc); | ||
|
||
// copy to local | ||
for (auto k : cms::alpakatools::independent_group_elements(acc, nt)) { | ||
if (iv[k] == int(kv)) { | ||
auto old = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{}); | ||
zz[old] = zt[k] - zv[kv]; | ||
newV[old] = zz[old] < 0 ? 0 : 1; | ||
ww[old] = 1.f / ezt2[k]; | ||
it[old] = k; | ||
// cache the data of the tracks associated to the current vertex into shared memory | ||
for (auto k : cms::alpakatools::independent_group_elements(acc, ws.ntrks())) { | ||
if (ws[k].iv() == int(kv)) { | ||
auto index = alpaka::atomicInc(acc, &nq, MAXTK, alpaka::hierarchy::Threads{}); | ||
it[index] = k; | ||
zz[index] = ws[k].zt() - data[kv].zv(); | ||
newV[index] = zz[index] < 0 ? 0 : 1; | ||
ww[index] = 1.f / ws[k].ezt2(); | ||
} | ||
} | ||
|
||
|
@@ -85,14 +66,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { | |
auto& wnew = alpaka::declareSharedVar<float[2], __COUNTER__>(acc); | ||
alpaka::syncBlockThreads(acc); | ||
|
||
ALPAKA_ASSERT_ACC(int(nq) == nn[kv] + 1); | ||
ALPAKA_ASSERT_ACC(int(nq) == ndof + 1); | ||
|
||
int maxiter = 20; | ||
// kt-min.... | ||
bool more = true; | ||
while (alpaka::syncBlockThreadsPredicate<alpaka::BlockOr>(acc, more)) { | ||
more = false; | ||
if (0 == threadIdxLocal) { | ||
if (cms::alpakatools::once_per_block(acc)) { | ||
znew[0] = 0; | ||
znew[1] = 0; | ||
wnew[0] = 0; | ||
|
@@ -107,7 +88,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { | |
} | ||
alpaka::syncBlockThreads(acc); | ||
|
||
if (0 == threadIdxLocal) { | ||
if (cms::alpakatools::once_per_block(acc)) { | ||
znew[0] /= wnew[0]; | ||
znew[1] /= wnew[1]; | ||
} | ||
|
@@ -134,30 +115,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { | |
|
||
auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]); | ||
|
||
if (verbose && 0 == threadIdxLocal) | ||
printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]); | ||
if constexpr (verbose) { | ||
if (cms::alpakatools::once_per_block(acc)) | ||
printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * data[kv].wv()); | ||
} | ||
|
||
if (chi2Dist < 4) | ||
continue; | ||
|
||
// get a new global vertex | ||
auto& igv = alpaka::declareSharedVar<uint32_t, __COUNTER__>(acc); | ||
if (0 == threadIdxLocal) | ||
if (cms::alpakatools::once_per_block(acc)) | ||
igv = alpaka::atomicAdd(acc, &ws.nvIntermediate(), 1u, alpaka::hierarchy::Blocks{}); | ||
alpaka::syncBlockThreads(acc); | ||
for (auto k : cms::alpakatools::uniform_elements(acc, nq)) { | ||
if (1 == newV[k]) | ||
iv[it[k]] = igv; | ||
ws[it[k]].iv() = igv; | ||
} | ||
|
||
// synchronise the threads before starting the next iteration of the loop over the vertices and resetting the shared memory | ||
alpaka::syncBlockThreads(acc); | ||
} // loop on vertices | ||
} | ||
|
||
class SplitVerticesKernel { | ||
public: | ||
template <typename TAcc> | ||
ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const { | ||
splitVertices(acc, pdata, pws, maxChi2); | ||
ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView data, WsSoAView ws, float maxChi2) const { | ||
splitVertices(acc, data, ws, maxChi2); | ||
} | ||
}; | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These should no longer be needed, because the SoA accessors include the
__restrict__
qualifier.