From 4d4fae14a6e37442042e93679b9d5970948b67bd Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Mon, 4 May 2020 16:57:06 -0400 Subject: [PATCH] Fix issues so all host/device combinations build and pass tests. - Add more metadata to mock specializations for testing iterator in testing/copy.cu. - Add missing include to shuffle unit test. - Specialize wrapped_function for void return types. - MSVC is not a fan of the pattern `return static_cast(expr);`. - Replace deprecated `tbb/tbb_thread.h` with ``. - Fix overcounting of initial value in tbb scans. - Apparently reverse_join may be called before operator() - Use `thrust::advance` instead of `+=` for generic iterators. - Wrap the OMP flags in -Xcompiler for NVCC - Extend ASSERT_STATIC_ASSERT skip for HOST=OMP, too - Add missing header caught by tbb.cuda configs. - Fix 'unsafe API' warnings in examples on MSVC: s/fopen/fstream/ --- CMakeLists.txt | 10 ++ examples/discrete_voronoi.cu | 30 ++--- testing/copy.cu | 11 ++ testing/shuffle.cu | 1 + testing/unittest_static_assert.cu | 2 +- thrust/cmake/thrust-config.cmake | 11 ++ thrust/detail/function.h | 129 +++++++++++++++------ thrust/detail/internal_functional.h | 1 + thrust/iterator/detail/zip_iterator_base.h | 5 +- thrust/system/tbb/detail/reduce_by_key.inl | 5 +- thrust/system/tbb/detail/scan.inl | 48 +++----- thrust/system/tbb/detail/sort.inl | 1 + 12 files changed, 167 insertions(+), 87 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b48717cd7..a4f1cf0985 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,6 +236,9 @@ if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}") # object files: append_option_if_available("/bigobj" THRUST_CXX_WARNINGS) + # "Oh right, this is Visual Studio." + add_compile_definitions("NOMINMAX") + set(THRUST_TREAT_FILE_AS_CXX "/TP") else () append_option_if_available("-Werror" THRUST_CXX_WARNINGS) @@ -679,6 +682,13 @@ foreach (THRUST_EXAMPLE_SOURCE IN LISTS THRUST_EXAMPLES) endif () endif () + if ("MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}") + # Some examples use unsafe APIs (e.g. fopen) that MSVC will complain about + # unless this is set: + set_target_properties(${THRUST_EXAMPLE} + PROPERTIES COMPILE_DEFINITIONS "_CRT_SECURE_NO_WARNINGS") + endif() + add_test(NAME ${THRUST_EXAMPLE} COMMAND ${CMAKE_COMMAND} -DTHRUST_EXAMPLE=${THRUST_EXAMPLE} diff --git a/examples/discrete_voronoi.cu b/examples/discrete_voronoi.cu index 93e7e5622b..949d409f29 100644 --- a/examples/discrete_voronoi.cu +++ b/examples/discrete_voronoi.cu @@ -4,10 +4,10 @@ #include #include #include -#include +#include #include -#include +#include #include #include "include/timer.h" @@ -135,21 +135,25 @@ void generate_random_sites(thrust::host_vector &t, int Nb, int m, int n) //Export the tab to PGM image format void vector_to_pgm(thrust::host_vector &t, int m, int n, const char *out) { - FILE *f; + assert(t.size() == m * n && "Vector size does not match image dims."); - f=fopen(out,"w+t"); - fprintf(f,"P2\n"); - fprintf(f,"%d %d\n 253\n",m,n); + std::fstream f(out, std::fstream::out); + f << "P2\n"; + f << m << " " << n << "\n"; + f << "253\n"; + + //Hash function to map values to [0,255] + auto to_grey_level = [&f](int in_value) -> int + { + return (71 * in_value) % 253; + }; - for(int j = 0; j < n ; j++) + for (int value : t) { - for(int i = 0; i < m ; i++) - { - fprintf(f,"%d ",(int)(71*t[j*m+i])%253); //Hash function to map values to [0,255] - } + f << to_grey_level(value) << " "; } - fprintf(f,"\n"); - fclose(f); + f << "\n"; + f.close(); } /************Main Jfa loop********************/ diff --git a/testing/copy.cu b/testing/copy.cu index 17c46292c0..64165c8e7a 100644 --- a/testing/copy.cu +++ b/testing/copy.cu @@ -724,6 +724,8 @@ struct only_set_when_expected_it __host__ __device__ only_set_when_expected_it operator*() const { return *this; } template __host__ __device__ only_set_when_expected_it operator+(Difference) const { return *this; } + template + __host__ __device__ only_set_when_expected_it operator+=(Difference) const { return *this; } template __host__ __device__ only_set_when_expected_it operator[](Index) const { return *this; } @@ -739,11 +741,20 @@ struct only_set_when_expected_it namespace thrust { +namespace detail +{ +// We need this type to pass as a non-const ref for unary_transform_functor +// to compile: +template <> +struct is_non_const_reference : thrust::true_type {}; +} + template<> struct iterator_traits { typedef long long value_type; typedef only_set_when_expected_it reference; + typedef thrust::random_access_device_iterator_tag iterator_category; }; } diff --git a/testing/shuffle.cu b/testing/shuffle.cu index 8c95720712..2d9094b421 100644 --- a/testing/shuffle.cu +++ b/testing/shuffle.cu @@ -2,6 +2,7 @@ #if THRUST_CPP_DIALECT >= 2011 #include +#include #include #include #include diff --git a/testing/unittest_static_assert.cu b/testing/unittest_static_assert.cu index dd5ed659be..02322f8d69 100644 --- a/testing/unittest_static_assert.cu +++ b/testing/unittest_static_assert.cu @@ -22,7 +22,7 @@ struct static_assertion template void TestStaticAssertAssert() { -#if THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_OMP +#if THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_OMP && THRUST_HOST_SYSTEM != THRUST_HOST_SYSTEM_OMP V test(10); ASSERT_STATIC_ASSERT(thrust::generate(test.begin(), test.end(), static_assertion())); #endif diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index cedde21d8c..4795a86f36 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -551,6 +551,16 @@ macro(_thrust_find_TBB required) endif() endmacro() +# Wrap the OpenMP flags for CUDA targets +function(thrust_fixup_omp_target omp_target) + get_target_property(opts ${omp_target} INTERFACE_COMPILE_OPTIONS) + if (opts MATCHES "\\$<\\$:([^>]*)>") + target_compile_options(${omp_target} INTERFACE + $<$:-Xcompiler=${CMAKE_MATCH_1}> + ) + endif() +endfunction() + # This must be a macro instead of a function to ensure that backends passed to # find_package(Thrust COMPONENTS [...]) have their full configuration loaded # into the current scope. This provides at least some remedy for CMake issue @@ -568,6 +578,7 @@ macro(_thrust_find_OMP required) ) if (TARGET OpenMP::OpenMP_CXX) + thrust_fixup_omp_target(OpenMP::OpenMP_CXX) thrust_set_OMP_target(OpenMP::OpenMP_CXX) else() thrust_debug("OpenMP::OpenMP_CXX not found!" internal) diff --git a/thrust/detail/function.h b/thrust/detail/function.h index f1f9e9c943..a251c298a2 100644 --- a/thrust/detail/function.h +++ b/thrust/detail/function.h @@ -24,80 +24,137 @@ namespace thrust namespace detail { - -template - struct wrapped_function +template +struct wrapped_function { // mutable because Function::operator() might be const mutable Function m_f; inline __host__ __device__ wrapped_function() - : m_f() + : m_f() {} inline __host__ __device__ - wrapped_function(const Function &f) - : m_f(f) + wrapped_function(const Function& f) + : m_f(f) {} __thrust_exec_check_disable__ - template + template inline __host__ __device__ - Result operator()(Argument &x) const + Result operator()(Argument& x) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void return static_cast(m_f(thrust::raw_reference_cast(x))); } __thrust_exec_check_disable__ - template - inline __host__ __device__ Result operator()(const Argument &x) const + template + inline __host__ __device__ + Result operator()(const Argument& x) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void return static_cast(m_f(thrust::raw_reference_cast(x))); } __thrust_exec_check_disable__ - template - inline __host__ __device__ Result operator()(Argument1 &x, Argument2 &y) const + template + inline __host__ __device__ + Result operator()(Argument1& x, Argument2& y) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + return static_cast(m_f(thrust::raw_reference_cast(x), + thrust::raw_reference_cast(y))); } __thrust_exec_check_disable__ - template - inline __host__ __device__ Result operator()(const Argument1 &x, Argument2 &y) const + template + inline __host__ __device__ + Result operator()(const Argument1& x, Argument2& y) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + return static_cast(m_f(thrust::raw_reference_cast(x), + thrust::raw_reference_cast(y))); } __thrust_exec_check_disable__ - template - inline __host__ __device__ Result operator()(const Argument1 &x, const Argument2 &y) const + template + inline __host__ __device__ + Result operator()(const Argument1& x, const Argument2& y) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + return static_cast(m_f(thrust::raw_reference_cast(x), + thrust::raw_reference_cast(y))); } __thrust_exec_check_disable__ - template - inline __host__ __device__ Result operator()(Argument1 &x, const Argument2 &y) const + template + inline __host__ __device__ + Result operator()(Argument1& x, const Argument2& y) const { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); + return static_cast(m_f(thrust::raw_reference_cast(x), + thrust::raw_reference_cast(y))); } }; // end wrapped_function +// Specialize for void return types: +template +struct wrapped_function +{ + // mutable because Function::operator() might be const + mutable Function m_f; + inline __host__ __device__ + wrapped_function() + : m_f() + {} + + inline __host__ __device__ + wrapped_function(const Function& f) + : m_f(f) + {} + + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(Argument& x) const + { + m_f(thrust::raw_reference_cast(x)); + } -} // end detail -} // end thrust + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(const Argument& x) const + { + m_f(thrust::raw_reference_cast(x)); + } + + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(Argument1& x, Argument2& y) const + { + m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); + } + + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(const Argument1& x, Argument2& y) const + { + m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); + } + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(const Argument1& x, const Argument2& y) const + { + m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); + } + __thrust_exec_check_disable__ + template + inline __host__ __device__ + void operator()(Argument1& x, const Argument2& y) const + { + m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); + } +}; // end wrapped_function +} // namespace detail +} // namespace thrust diff --git a/thrust/detail/internal_functional.h b/thrust/detail/internal_functional.h index 737e75eb4a..7e2d65c1f7 100644 --- a/thrust/detail/internal_functional.h +++ b/thrust/detail/internal_functional.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/thrust/iterator/detail/zip_iterator_base.h b/thrust/iterator/detail/zip_iterator_base.h index e0d941c8f3..b1603aed4d 100644 --- a/thrust/iterator/detail/zip_iterator_base.h +++ b/thrust/iterator/detail/zip_iterator_base.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -45,12 +46,12 @@ class advance_iterator public: inline __host__ __device__ advance_iterator(DiffType step) : m_step(step) {} - + __thrust_exec_check_disable__ template inline __host__ __device__ void operator()(Iterator& it) const - { it += m_step; } + { thrust::advance(it, m_step); } private: DiffType m_step; diff --git a/thrust/system/tbb/detail/reduce_by_key.inl b/thrust/system/tbb/detail/reduce_by_key.inl index a9516e4a1c..70933f3070 100644 --- a/thrust/system/tbb/detail/reduce_by_key.inl +++ b/thrust/system/tbb/detail/reduce_by_key.inl @@ -27,8 +27,9 @@ #include #include #include -#include + #include +#include namespace thrust @@ -281,7 +282,7 @@ template(1u, ::tbb::tbb_thread::hardware_concurrency()); + const unsigned int p = thrust::max(1u, std::thread::hardware_concurrency()); // generate O(P) intervals of sequential work // XXX oversubscribing is a tuning opportunity diff --git a/thrust/system/tbb/detail/scan.inl b/thrust/system/tbb/detail/scan.inl index 477c04ee38..7b58a9a310 100644 --- a/thrust/system/tbb/detail/scan.inl +++ b/thrust/system/tbb/detail/scan.inl @@ -104,7 +104,12 @@ struct inclusive_body void reverse_join(inclusive_body& b) { - sum = binary_op(b.sum, sum); + // Only accumulate this functor's partial sum if this functor has been + // called at least once -- otherwise we'll over-count the initial value. + if (!first_call) + { + sum = binary_op(b.sum, sum); + } } void assign(inclusive_body& b) @@ -172,8 +177,13 @@ struct exclusive_body void reverse_join(exclusive_body& b) { - sum = binary_op(b.sum, sum); - } + // Only accumulate this functor's partial sum if this functor has been + // called at least once -- otherwise we'll over-count the initial value. + if (!first_call) + { + sum = binary_op(b.sum, sum); + } + } void assign(exclusive_body& b) { @@ -183,8 +193,6 @@ struct exclusive_body } // end scan_detail - - template @@ -194,18 +202,6 @@ template::type - using namespace thrust::detail; typedef typename eval_if< @@ -228,13 +224,12 @@ template(0,n), scan_body); } - + thrust::advance(result, n); return result; } - template::type - using namespace thrust::detail; typedef typename eval_if< @@ -280,7 +263,7 @@ template(0,n), scan_body); } - + thrust::advance(result, n); return result; @@ -290,4 +273,3 @@ template #include #include +#include #include #include