Skip to content

Commit

Permalink
Merge pull request #20 from cerati/mt-devel
Browse files Browse the repository at this point in the history
Fix standard track building.
  • Loading branch information
cerati committed Jan 12, 2016
2 parents c5f8c4c + 9378b30 commit 1bc0bef
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 34 deletions.
36 changes: 17 additions & 19 deletions mkFit/MkBuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,9 @@ void MkBuilder::FindTracks()
std::cout << "processing lay=" << ilay+1 << std::endl;
#endif

//prepare unrolled vector to loop over
// prepare unrolled vector to loop over
std::vector<std::pair<int,int> > seed_cand_idx;

for (int iseed = otd.th_start_seed; iseed != otd.th_end_seed; ++iseed)
{
for (int ic = 0; ic < etabin_of_comb_candidates.m_candidates[iseed].size(); ++ic)
Expand Down Expand Up @@ -546,7 +547,20 @@ void MkBuilder::FindTracks()
//fixme find a way to deal only with the candidates needed in this thread
mkfp->InputTracksAndHitIdx(etabin_of_comb_candidates.m_candidates,
seed_cand_idx, itrack, end,
true);
ilay == Config::nlayers_per_seed);

//propagate to layer
if (ilay > Config::nlayers_per_seed)
{
#ifdef DEBUG
std::cout << "propagate to lay=" << ilay+1 << " start from x=" << mkfp->getPar(0, 0, 0) << " y=" << mkfp->getPar(0, 0, 1) << " z=" << mkfp->getPar(0, 0, 2)<< " r=" << getHypot(mkfp->getPar(0, 0, 0), mkfp->getPar(0, 0, 1))
<< " px=" << mkfp->getPar(0, 0, 3) << " py=" << mkfp->getPar(0, 0, 4) << " pz=" << mkfp->getPar(0, 0, 5) << " pT=" << getHypot(mkfp->getPar(0, 0, 3), mkfp->getPar(0, 0, 4)) << std::endl;
#endif
mkfp->PropagateTracksToR(m_event->geom_.Radius(ilay), end - itrack);
#ifdef DEBUG
std::cout << "propagate to lay=" << ilay+1 << " arrive at x=" << mkfp->getPar(0, 1, 0) << " y=" << mkfp->getPar(0, 1, 1) << " z=" << mkfp->getPar(0, 1, 2)<< " r=" << getHypot(mkfp->getPar(0, 1, 0), mkfp->getPar(0, 1, 1)) << std::endl;
#endif
}

#ifdef DEBUG
std::cout << "now get hit range" << std::endl;
Expand All @@ -562,23 +576,7 @@ void MkBuilder::FindTracks()
std::cout << "make new candidates" << std::endl;
#endif

mkfp->FindCandidates(bunch_of_hits, tmp_candidates, otd.th_start_seed);

//propagate to layer
// This is sort of a silly fix as no-clone-engine code produces
// zero good tracks with propagate-at-the-end.
// But at least it doesn't crash with uncaught exception :)
if (ilay + 1 < Config::nLayers)
{
#ifdef DEBUG
std::cout << "propagate to lay=" << ilay+2 << " start from x=" << mkfp->getPar(0, 0, 0) << " y=" << mkfp->getPar(0, 0, 1) << " z=" << mkfp->getPar(0, 0, 2)<< " r=" << getHypot(mkfp->getPar(0, 0, 0), mkfp->getPar(0, 0, 1))
<< " px=" << mkfp->getPar(0, 0, 3) << " py=" << mkfp->getPar(0, 0, 4) << " pz=" << mkfp->getPar(0, 0, 5) << " pT=" << getHypot(mkfp->getPar(0, 0, 3), mkfp->getPar(0, 0, 4)) << std::endl;
#endif
mkfp->PropagateTracksToR(m_event->geom_.Radius(ilay+1), end - itrack);
#ifdef DEBUG
std::cout << "propagate to lay=" << ilay+2 << " arrive at x=" << mkfp->getPar(0, 1, 0) << " y=" << mkfp->getPar(0, 1, 1) << " z=" << mkfp->getPar(0, 1, 2)<< " r=" << getHypot(mkfp->getPar(0, 1, 0), mkfp->getPar(0, 1, 1)) << std::endl;
#endif
}
mkfp->FindCandidates(bunch_of_hits, tmp_candidates, otd.th_start_seed, end - itrack);

} //end of vectorized loop

Expand Down
41 changes: 27 additions & 14 deletions mkFit/MkFitter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ void MkFitter::InputTracksAndHitIdx(std::vector<std::vector<Track> >& tracks,
int itrack = 0;
for (int i = beg; i < end; ++i, ++itrack)
{

Track &trk = tracks[idxs[i].first][idxs[i].second];

Label(itrack, 0, 0) = trk.label();
Expand All @@ -119,7 +118,7 @@ void MkFitter::InputTracksAndHitIdx(std::vector<std::vector<Track> >& tracks,
Err[iI].CopyIn(itrack, trk.errors().Array());
Par[iI].CopyIn(itrack, trk.parameters().Array());

Chg(itrack, 0, 0) = trk.charge();
Chg (itrack, 0, 0) = trk.charge();
Chi2(itrack, 0, 0) = trk.chi2();

for (int hi = 0; hi < Nhits; ++hi)
Expand Down Expand Up @@ -914,7 +913,9 @@ void MkFitter::AddBestHit(BunchOfHits &bunch_of_hits)



void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vector<Track> >& tmp_candidates, int offset)
void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits,
std::vector<std::vector<Track> >& tmp_candidates,
const int offset, const int N_proc)
{

const char *varr = (char*) bunch_of_hits.m_hits;
Expand All @@ -923,25 +924,37 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto
const int off_param = (char*) bunch_of_hits.m_hits[0].posArray() - varr;

int idx[NN] __attribute__((aligned(64)));
int idx_chew[NN] __attribute__((aligned(64)));
// int idx_chew[NN] __attribute__((aligned(64)));

int maxSize = -1;

// Determine maximum number of hits for tracks in the collection.
// At the same time prefetch the first set of hits to L1 and the second one to L2.
for (int it = 0; it < NN; ++it)
for (int it = 0; it < N_proc; ++it)
{
int off = XHitPos.At(it, 0, 0) * sizeof(Hit);

_mm_prefetch(varr + off, _MM_HINT_T0);
_mm_prefetch(varr + sizeof(Hit) + off, _MM_HINT_T1);

idx[it] = off;
idx_chew[it] = it*sizeof(Hit);
// idx_chew[it] = it*sizeof(Hit);

// XXX There is an intrinsic for that, out of loop.
maxSize = std::max(maxSize, XHitSize.At(it, 0, 0));
}
// XXXX MT FIXME: Use the limit for:
// - SlurpIns, use masked gather for MIC_INTRINSICS
// - prefetching loops - DONE
// - computeChi2MPlex() -- really hard ... it calls Matriplex functions. This
// should be fine. - DOES NOT NEED TO BE DONE
// - hit (valid or invalid) registration loops - DONE
// The following loop is not needed then. But I do need a mask for intrinsics slurpin.
for (int it = N_proc; it < NN; ++it)
{
idx[it] = idx[0];
// idx_chew[it] = idx_chew[0];
}

// XXXX MT Uber hack to avoid tracks with like 300 hits to process.
maxSize = std::min(maxSize, Config::maxHitsConsidered);
Expand All @@ -950,7 +963,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto
//__m512i vi = _mm512_setr_epi32(idx[ 0], idx[ 1], idx[ 2], idx[ 3], idx[ 4], idx[ 5], idx[ 6], idx[ 7],
// idx[ 8], idx[ 9], idx[10], idx[11], idx[12], idx[13], idx[14], idx[15]);
__m512i vi = _mm512_load_epi32(idx);
__m512i vi_chew = _mm512_load_epi32(idx_chew);
// __m512i vi_chew = _mm512_load_epi32(idx_chew);
#endif

// Has basically no effect, it seems.
Expand All @@ -961,7 +974,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto

// Prefetch to L2 the hits we'll process after two loops iterations.
// Ideally this would be initiated before coming here, for whole bunch_of_hits.m_hits vector.
for (int itrack = 0; itrack < NN; ++itrack)
for (int itrack = 0; itrack < N_proc; ++itrack)
{
_mm_prefetch(varr + 2*sizeof(Hit) + idx[itrack], _MM_HINT_T1);
}
Expand Down Expand Up @@ -992,7 +1005,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto
computeChi2MPlex(Err[iP], Par[iP], Chg, msErr[Nhits], msPar[Nhits], outChi2);

// Prefetch to L1 the hits we'll process in the next loop iteration.
for (int itrack = 0; itrack < NN; ++itrack)
for (int itrack = 0; itrack < N_proc; ++itrack)
{
_mm_prefetch(varr + sizeof(Hit) + idx[itrack], _MM_HINT_T0);
}
Expand All @@ -1001,7 +1014,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto
//this is not needed for candidates the hit is not added to, but it's vectorized so doing it serially below should take the same time
//still it's a waste of time in case the hit is not added to any of the candidates, so check beforehand that at least one cand needs update
bool oneCandPassCut = false;
for (int itrack = 0; itrack < NN;++itrack)
for (int itrack = 0; itrack < N_proc;++itrack)
{
float chi2 = fabs(outChi2[itrack]);//fixme negative chi2 sometimes...
#ifdef DEBUG
Expand All @@ -1026,7 +1039,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto

//create candidate with hit in case chi2<Config::chi2Cut
//fixme: please vectorize me... (not sure it's possible in this case)
for (int itrack = 0; itrack < NN; ++itrack)
for (int itrack = 0; itrack < N_proc; ++itrack)
{
float chi2 = fabs(outChi2[itrack]);//fixme negative chi2 sometimes...
#ifdef DEBUG
Expand All @@ -1051,11 +1064,11 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto
//set the track state to the updated parameters
Err[iC].CopyOut(itrack, newcand.errors_nc().Array());
Par[iC].CopyOut(itrack, newcand.parameters_nc().Array());

#ifdef DEBUG
std::cout << "updated track parameters x=" << newcand.parameters()[0] << " y=" << newcand.parameters()[1] << std::endl;
#endif

tmp_candidates[SeedIdx(itrack, 0, 0)-offset].push_back(newcand);
}
}
Expand All @@ -1065,7 +1078,7 @@ void MkFitter::FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vecto

//now add invalid hit
//fixme: please vectorize me...
for (int itrack = 0; itrack < NN;++itrack)
for (int itrack = 0; itrack < N_proc; ++itrack)
{
if (countInvalidHits(itrack)>0) continue;//check this is ok for vectorization //fixme not optimal
Track newcand;
Expand Down
3 changes: 2 additions & 1 deletion mkFit/MkFitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class MkFitter
void SelectHitRanges(BunchOfHits &bunch_of_hits, const int N_proc);
void AddBestHit (BunchOfHits &bunch_of_hits);

void FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vector<Track> >& tmp_candidates, int offset);
void FindCandidates(BunchOfHits &bunch_of_hits, std::vector<std::vector<Track> >& tmp_candidates,
const int offset, const int N_proc);

// ================================================================
// Methods to be used with clone engine
Expand Down

0 comments on commit 1bc0bef

Please sign in to comment.