-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add AlpakaCore/HistoContainer.h + HistoContainer_t, OneHistoContainer_t and OneToManyAssoc_t tests #165
Add AlpakaCore/HistoContainer.h + HistoContainer_t, OneHistoContainer_t and OneToManyAssoc_t tests #165
Changes from 1 commit
faaede0
adb66eb
f548e27
c4878d5
d8c9ad6
5bacd9c
46805ad
5b7b5ab
0cd5c71
e8c66b6
b465663
c6e30b8
995af06
82ffd14
c6477b6
94f94e6
2ff31f9
8e9e776
8aaa1f3
b6e9adc
94105db
c08d205
88545ea
ba641ee
5c9a8b9
94176e9
cbb609f
d759b1f
c1824b8
52ccf49
788556d
548b4b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -12,8 +12,6 @@ | |||||
#include "AlpakaCore/alpakastdAlgorithm.h" | ||||||
#include "AlpakaCore/prefixScan.h" | ||||||
|
||||||
using namespace ALPAKA_ACCELERATOR_NAMESPACE; | ||||||
|
||||||
namespace cms { | ||||||
namespace alpakatools { | ||||||
|
||||||
|
@@ -91,9 +89,10 @@ namespace cms { | |||||
}; | ||||||
|
||||||
template <typename Histo> | ||||||
ALPAKA_FN_HOST ALPAKA_FN_INLINE __attribute__((always_inline)) void launchFinalize(Histo *__restrict__ h, | ||||||
const DevAcc1 &device, | ||||||
Queue &queue) { | ||||||
ALPAKA_FN_HOST ALPAKA_FN_INLINE __attribute__((always_inline)) void launchFinalize( | ||||||
Histo *__restrict__ h, | ||||||
const ALPAKA_ACCELERATOR_NAMESPACE::DevAcc1 &device, | ||||||
ALPAKA_ACCELERATOR_NAMESPACE::Queue &queue) { | ||||||
uint32_t *poff = (uint32_t *)((char *)(h) + offsetof(Histo, off)); | ||||||
|
||||||
const int num_items = Histo::totbins(); | ||||||
|
@@ -108,19 +107,19 @@ namespace cms { | |||||
|
||||||
alpaka::queue::enqueue( | ||||||
queue, | ||||||
alpaka::kernel::createTaskKernel<Acc1>( | ||||||
alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>( | ||||||
WorkDiv1{Vec1::all(1u), Vec1::all(1u), Vec1::all(1u)}, storePrefixScanWorkingSpace(), h, nblocks)); | ||||||
|
||||||
const WorkDiv1 &workDiv = cms::alpakatools::make_workdiv(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||||||
alpaka::queue::enqueue(queue, | ||||||
alpaka::kernel::createTaskKernel<Acc1>( | ||||||
alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>( | ||||||
workDiv, multiBlockPrefixScanFirstStep<uint32_t>(), poff, poff, psum_d, num_items)); | ||||||
|
||||||
const WorkDiv1 &workDivWith1Block = | ||||||
cms::alpakatools::make_workdiv(Vec1::all(1), threadsPerBlockOrElementsPerThread); | ||||||
alpaka::queue::enqueue( | ||||||
queue, | ||||||
alpaka::kernel::createTaskKernel<Acc1>( | ||||||
alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>( | ||||||
workDivWith1Block, multiBlockPrefixScanSecondStep<uint32_t>(), poff, poff, psum_d, num_items, nblocks)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (this comment is not really in the scope of this PR) I'm curious why the
I'm mostly wondering if this split was a design choice, or if there were any constraints that prevented the CUDA-style implementation. Let me tag @felicepantaleo in case he would remember. |
||||||
} | ||||||
|
||||||
|
@@ -132,21 +131,24 @@ namespace cms { | |||||
uint32_t const *__restrict__ offsets, | ||||||
uint32_t totSize, | ||||||
unsigned int nthreads, | ||||||
const DevAcc1 &device, | ||||||
Queue &queue) { | ||||||
const ALPAKA_ACCELERATOR_NAMESPACE::DevAcc1 &device, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
ALPAKA_ACCELERATOR_NAMESPACE::Queue &queue) { | ||||||
const unsigned int nblocks = (totSize + nthreads - 1) / nthreads; | ||||||
const Vec1 blocksPerGrid(nblocks); | ||||||
const Vec1 threadsPerBlockOrElementsPerThread(nthreads); | ||||||
const WorkDiv1 &workDiv = cms::alpakatools::make_workdiv(blocksPerGrid, threadsPerBlockOrElementsPerThread); | ||||||
|
||||||
alpaka::queue::enqueue(queue, alpaka::kernel::createTaskKernel<Acc1>(workDiv, launchZero(), h)); | ||||||
alpaka::queue::enqueue( | ||||||
queue, alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>(workDiv, launchZero(), h)); | ||||||
|
||||||
alpaka::queue::enqueue(queue, | ||||||
alpaka::kernel::createTaskKernel<Acc1>(workDiv, countFromVector(), h, nh, v, offsets)); | ||||||
alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>( | ||||||
workDiv, countFromVector(), h, nh, v, offsets)); | ||||||
launchFinalize(h, device, queue); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Following removal of
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ha yes true, this argument is not necessary anymore now, thanks. I just removed it. |
||||||
|
||||||
alpaka::queue::enqueue(queue, | ||||||
alpaka::kernel::createTaskKernel<Acc1>(workDiv, fillFromVector(), h, nh, v, offsets)); | ||||||
alpaka::kernel::createTaskKernel<ALPAKA_ACCELERATOR_NAMESPACE::Acc1>( | ||||||
workDiv, fillFromVector(), h, nh, v, offsets)); | ||||||
} | ||||||
|
||||||
struct finalizeBulk { | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,10 @@ | |
#include "AlpakaCore/alpakaWorkDivHelper.h" | ||
#include "AlpakaCore/HistoContainer.h" | ||
|
||
using namespace ALPAKA_ACCELERATOR_NAMESPACE; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, but actually here (and in other unit test files) the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes but at this stage, found it more consistent to remove it everywhere, rather than having a mix of 'using namespace' and ALPAKA_ACCELERATOR_NAMESPACE:: |
||
template <typename T> | ||
void go(const DevHost& host, const DevAcc1& device, Queue& queue) { | ||
void go(const DevHost& host, | ||
const ALPAKA_ACCELERATOR_NAMESPACE::DevAcc1& device, | ||
ALPAKA_ACCELERATOR_NAMESPACE::Queue& queue) { | ||
std::mt19937 eng; | ||
std::uniform_int_distribution<T> rgen(std::numeric_limits<T>::min(), std::numeric_limits<T>::max()); | ||
|
||
|
@@ -162,8 +162,9 @@ void go(const DevHost& host, const DevAcc1& device, Queue& queue) { | |
|
||
int main() { | ||
const DevHost host(alpaka::pltf::getDevByIdx<PltfHost>(0u)); | ||
const DevAcc1 device(alpaka::pltf::getDevByIdx<PltfAcc1>(0u)); | ||
Queue queue(device); | ||
const ALPAKA_ACCELERATOR_NAMESPACE::DevAcc1 device( | ||
alpaka::pltf::getDevByIdx<ALPAKA_ACCELERATOR_NAMESPACE::PltfAcc1>(0u)); | ||
ALPAKA_ACCELERATOR_NAMESPACE::Queue queue(device); | ||
|
||
go<int16_t>(host, device, queue); | ||
go<int8_t>(host, device, queue); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After removing the memory allocation, the
device
parameter is not needed anymore