Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
CUDA reduce:
Browse files Browse the repository at this point in the history
- Fix dispatch for the CUDA backend's `reduce` to use two functions (one with the pragma for disabling exec checks, one with THRUST_RUNTIME_FUNCTION) instead of one. This fixes a regression with device compilation that started in CUDA 9.2
- Fully namespace qualify uses of things in the `thrust::detail` namespace to avoid ambiguities.
Review: Internal GitLab #888
Signed-off-by: Jared Hoberock <[email protected]>
Bug 2096679
Bug 2351990
GitHub #924
git-commit 412c623f939fd676ee619c93f2ca478a6046c611
git-author Bryce Adelstein Lelbach aka wash <[email protected]>
VDVS: http://ausdvs.nvidia.com/Build_Results?virtualId=1000216448&which_page=current_build

Jobs: 2096679-2006
[git-p4: depot-paths = "//sw/gpgpu/thrust/": change = 24706499]
  • Loading branch information
brycelelbach committed Aug 10, 2018
1 parent 2ea3980 commit 17a8f8c
Show file tree
Hide file tree
Showing 24 changed files with 345 additions and 306 deletions.
3 changes: 2 additions & 1 deletion thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ namespace __adjacent_difference {
cuda_cub::throw_on_error(status, "adjacent_difference failed on 1st step");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = doit_step(ptr,
Expand Down
3 changes: 2 additions & 1 deletion thrust/system/cuda/detail/binary_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,8 @@ namespace __binary_search {
cuda_cub::throw_on_error(status, "binary_search: failed on 1st call");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = doit_pass(ptr,
Expand Down
7 changes: 4 additions & 3 deletions thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ namespace __copy_if {

enum
{
USE_STENCIL = !detail::is_same<StencilIt, no_stencil_tag>::value,
USE_STENCIL = !thrust::detail::is_same<StencilIt, no_stencil_tag>::value,
BLOCK_THREADS = ptx_plan::BLOCK_THREADS,
ITEMS_PER_THREAD = ptx_plan::ITEMS_PER_THREAD,
ITEMS_PER_TILE = ptx_plan::ITEMS_PER_TILE
Expand Down Expand Up @@ -740,7 +740,8 @@ namespace __copy_if {
cuda_cub::throw_on_error(status, "copy_if failed on 1st alias_storage");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = core::alias_storage(ptr,
Expand All @@ -750,7 +751,7 @@ namespace __copy_if {
cuda_cub::throw_on_error(status, "copy_if failed on 2nd alias_storage");

size_type* d_num_selected_out
= detail::aligned_reinterpret_cast<size_type*>(allocations[0]);
= thrust::detail::aligned_reinterpret_cast<size_type*>(allocations[0]);

status = doit_step(allocations[1],
temp_storage_bytes,
Expand Down
64 changes: 32 additions & 32 deletions thrust/system/cuda/detail/core/agent_launcher.h

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions thrust/system/cuda/detail/core/alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

// TODO: This can probably be removed.

#pragma once

#include <thrust/system/cuda/detail/util.h>
Expand Down
30 changes: 15 additions & 15 deletions thrust/system/cuda/detail/core/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ namespace core {
// otherwise move on to the next sm in the sm_list
template <template <class> class P, class SM, class _1, class _2, class _3, class _4, class _5, class _6, class _7, class _8, class _9>
struct specialize_plan_impl_match<P, typelist<SM, _1, _2, _3, _4, _5, _6, _7, _8, _9> >
: detail::conditional<
: thrust::detail::conditional<
has_sm_tuning<P, SM>::value,
P<SM>,
specialize_plan_impl_match<P, typelist<_1, _2, _3, _4, _5, _6, _7, _8, _9> > >::type {};
Expand All @@ -148,7 +148,7 @@ namespace core {
// if Plan has tuning type, this means it has SM-specific tuning
// so loop through sm_list to find match,
// otherwise just specialize on provided SM
typedef detail::conditional<has_tuning_t<Plan<lowest_supported_sm_arch> >::value,
typedef thrust::detail::conditional<has_tuning_t<Plan<lowest_supported_sm_arch> >::value,
specialize_plan_impl_loop<Plan, SM, sm_list>,
Plan<SM> >
type;
Expand All @@ -173,7 +173,7 @@ namespace core {
struct temp_storage_size_impl;

template <class Agent>
struct temp_storage_size_impl<Agent, detail::false_type>
struct temp_storage_size_impl<Agent, thrust::detail::false_type>
{
enum
{
Expand All @@ -182,7 +182,7 @@ namespace core {
};

template <class Agent>
struct temp_storage_size_impl<Agent, detail::true_type>
struct temp_storage_size_impl<Agent, thrust::detail::true_type>
{
enum
{
Expand Down Expand Up @@ -223,9 +223,9 @@ namespace core {
{
value = V
};
typedef typename detail::conditional<value,
detail::true_type,
detail::false_type>::type type;
typedef typename thrust::detail::conditional<value,
thrust::detail::true_type,
thrust::detail::false_type>::type type;
};

template <class Agent, size_t MAX_SHMEM>
Expand Down Expand Up @@ -275,7 +275,7 @@ namespace core {
template <class PtxPlan>
THRUST_RUNTIME_FUNCTION
AgentPlan(PtxPlan,
typename detail::disable_if_convertible<
typename thrust::detail::disable_if_convertible<
PtxPlan,
AgentPlan>::type* = NULL)
: block_threads(PtxPlan::BLOCK_THREADS),
Expand All @@ -297,10 +297,10 @@ namespace core {
};

template <class Agent>
struct get_plan : detail::conditional<
struct get_plan : thrust::detail::conditional<
has_Plan<Agent>::value,
return_Plan<Agent>,
detail::identity_<AgentPlan> >::type
thrust::detail::identity_<AgentPlan> >::type
{
};

Expand Down Expand Up @@ -602,8 +602,8 @@ namespace core {
typedef typename iterator_traits<It>::value_type value_type;
typedef typename iterator_traits<It>::difference_type size_type;

typedef typename detail::conditional<
detail::is_trivial_iterator<It>::value,
typedef typename thrust::detail::conditional<
thrust::detail::is_trivial_iterator<It>::value,
cub::CacheModifiedInputIterator<PtxPlan::LOAD_MODIFIER,
value_type,
size_type>,
Expand All @@ -612,14 +612,14 @@ namespace core {

template <class PtxPlan, class It>
typename LoadIterator<PtxPlan, It>::type __device__ __forceinline__
make_load_iterator_impl(It it, detail::true_type /* is_trivial */)
make_load_iterator_impl(It it, thrust::detail::true_type /* is_trivial */)
{
return raw_pointer_cast(&*it);
}

template <class PtxPlan, class It>
typename LoadIterator<PtxPlan, It>::type __device__ __forceinline__
make_load_iterator_impl(It it, detail::false_type /* is_trivial */)
make_load_iterator_impl(It it, thrust::detail::false_type /* is_trivial */)
{
return it;
}
Expand All @@ -629,7 +629,7 @@ namespace core {
make_load_iterator(PtxPlan const&, It it)
{
return make_load_iterator_impl<PtxPlan>(
it, typename detail::is_trivial_iterator<It>::type());
it, typename thrust::detail::is_trivial_iterator<It>::type());
}

template<class>
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/count.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ count(execution_policy<Derived> &policy,
return cuda_cub::count_if(policy,
first,
last,
detail::equal_to_value<Value>(value));
thrust::detail::equal_to_value<Value>(value));
}

} // namespace cuda_cub
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/error.inl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class cuda_error_category

const error_category &cuda_category(void)
{
static const cuda_cub::detail::cuda_error_category result;
static const thrust::system::cuda_cub::detail::cuda_error_category result;
return result;
}

Expand Down
5 changes: 3 additions & 2 deletions thrust/system/cuda/detail/extrema.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ namespace __extrema {
cuda_cub::throw_on_error(status, "extrema failed on 1st alias storage");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = core::alias_storage(ptr,
Expand All @@ -351,7 +352,7 @@ namespace __extrema {
allocation_sizes);
cuda_cub::throw_on_error(status, "extrema failed on 2nd alias storage");

T* d_result = detail::aligned_reinterpret_cast<T*>(allocations[0]);
T* d_result = thrust::detail::aligned_reinterpret_cast<T*>(allocations[0]);

status = doit_step<T>(allocations[1],
temp_storage_bytes,
Expand Down
4 changes: 2 additions & 2 deletions thrust/system/cuda/detail/find.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ find_if_not(execution_policy<Derived>& policy,
InputIt last,
Predicate predicate)
{
return cuda_cub::find_if(policy, first, last, detail::not1(predicate));
return cuda_cub::find_if(policy, first, last, thrust::detail::not1(predicate));
}


Expand All @@ -206,7 +206,7 @@ find(execution_policy<Derived> &policy,
return cuda_cub::find_if(policy,
first,
last,
detail::equal_to_value<T>(value));
thrust::detail::equal_to_value<T>(value));
}


Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/for_each.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ namespace cuda_cub {
Size count,
UnaryOp op)
{
typedef detail::wrapped_function<UnaryOp, void> wrapped_t;
typedef thrust::detail::wrapped_function<UnaryOp, void> wrapped_t;
wrapped_t wrapped_op(op);

cuda_cub::parallel_for(policy,
Expand Down
47 changes: 24 additions & 23 deletions thrust/system/cuda/detail/merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ namespace __merge {


template<size_t VALUE>
struct integer_constant : detail::integral_constant<size_t, VALUE> {};
struct integer_constant : thrust::detail::integral_constant<size_t, VALUE> {};

template <class KeysIt1,
class KeysIt2,
Expand All @@ -309,7 +309,7 @@ namespace __merge {
typedef key1_type key_type;
typedef item1_type item_type;

typedef typename detail::conditional<
typedef typename thrust::detail::conditional<
MERGE_ITEMS::value,
integer_constant<sizeof(key_type) + sizeof(item_type)>,
integer_constant<sizeof(key_type)> >::type tuning_type;
Expand Down Expand Up @@ -828,7 +828,8 @@ namespace __merge {
cuda_cub::throw_on_error(status, "merge: failed on 1st step");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = doit_step<MERGE_ITEMS>(ptr,
Expand Down Expand Up @@ -882,16 +883,16 @@ merge(execution_policy<Derived>& policy,
//
keys_type* null_ = NULL;
//
ret = __merge::merge<detail::false_type>(policy,
keys1_first,
keys1_last,
keys2_first,
keys2_last,
null_,
null_,
result,
null_,
compare_op)
ret = __merge::merge<thrust::detail::false_type>(policy,
keys1_first,
keys1_last,
keys2_first,
keys2_last,
null_,
null_,
result,
null_,
compare_op)
.first;
}
else
Expand Down Expand Up @@ -952,16 +953,16 @@ merge_by_key(execution_policy<Derived> &policy,
pair<KeysOutputIt, ItemsOutputIt> ret = thrust::make_pair(keys_result, items_result);
if (__THRUST_HAS_CUDART__)
{
return __merge::merge<detail::true_type>(policy,
keys1_first,
keys1_last,
keys2_first,
keys2_last,
items1_first,
items2_first,
keys_result,
items_result,
compare_op);
return __merge::merge<thrust::detail::true_type>(policy,
keys1_first,
keys1_last,
keys2_first,
keys2_last,
items1_first,
items2_first,
keys_result,
items_result,
compare_op);
}
else
{
Expand Down
11 changes: 6 additions & 5 deletions thrust/system/cuda/detail/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ namespace __partition {

enum
{
SINGLE_OUTPUT = detail::is_same<RejectedOutIt, single_output_tag>::value,
USE_STENCIL = !detail::is_same<StencilIt, no_stencil_tag>::value,
SINGLE_OUTPUT = thrust::detail::is_same<RejectedOutIt, single_output_tag>::value,
USE_STENCIL = !thrust::detail::is_same<StencilIt, no_stencil_tag>::value,
BLOCK_THREADS = ptx_plan::BLOCK_THREADS,
ITEMS_PER_THREAD = ptx_plan::ITEMS_PER_THREAD,
ITEMS_PER_TILE = ptx_plan::ITEMS_PER_TILE
Expand Down Expand Up @@ -750,7 +750,8 @@ namespace __partition {
cuda_cub::throw_on_error(status, "partition failed on 1st alias_storage");

// Allocate temporary storage.
detail::temporary_array<detail::uint8_t, Derived> tmp(policy, storage_size);
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = core::alias_storage(ptr,
Expand All @@ -760,7 +761,7 @@ namespace __partition {
cuda_cub::throw_on_error(status, "partition failed on 2nd alias_storage");

size_type* d_num_selected_out
= detail::aligned_reinterpret_cast<size_type*>(allocations[0]);
= thrust::detail::aligned_reinterpret_cast<size_type*>(allocations[0]);

status = doit_step(allocations[1],
temp_storage_bytes,
Expand Down Expand Up @@ -805,7 +806,7 @@ namespace __partition {
size_type num_items = thrust::distance(first, last);

// Allocate temporary storage.
detail::temporary_array<value_type, Derived> tmp(policy, num_items);
thrust::detail::temporary_array<value_type, Derived> tmp(policy, num_items);

cuda_cub::uninitialized_copy(policy, first, last, tmp.begin());

Expand Down
Loading

0 comments on commit 17a8f8c

Please sign in to comment.