diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 3e9e7e76a90..5db32987624 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -29,7 +29,6 @@ target_link_libraries(cudf_datagen GTest::gmock_main GTest::gtest_main benchmark::benchmark - benchmark::benchmark_main Threads::Threads cudf) @@ -52,7 +51,8 @@ function(ConfigureBench CMAKE_BENCH_NAME) add_executable(${CMAKE_BENCH_NAME} ${ARGN}) set_target_properties(${CMAKE_BENCH_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen) + target_link_libraries(${CMAKE_BENCH_NAME} + PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main) endfunction() ################################################################################################### diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index 66ef3e9b6a3..b4a6a67c24d 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -14,12 +14,26 @@ # limitations under the License. #============================================================================= -function(find_and_configure_rmm VERSION) +function(cudf_save_if_enabled var) + if(CUDF_${var}) + unset(${var} PARENT_SCOPE) + unset(${var} CACHE) + endif() +endfunction() +function(cudf_restore_if_enabled var) + if(CUDF_${var}) + set(${var} ON CACHE INTERNAL "" FORCE) + endif() +endfunction() + +function(find_and_configure_rmm VERSION) # Consumers have two options for local source builds: # 1. Pass `-D CPM_rmm_SOURCE=/path/to/rmm` to build a local RMM source tree # 2. Pass `-D CMAKE_PREFIX_PATH=/path/to/rmm/build` to use an existing local # RMM build directory as the install location for find_package(rmm) + cudf_save_if_enabled(BUILD_TESTS) + cudf_save_if_enabled(BUILD_BENCHMARKS) CPMFindPackage(NAME rmm VERSION ${VERSION} @@ -32,6 +46,8 @@ function(find_and_configure_rmm VERSION) "CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}" "DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}" ) + cudf_restore_if_enabled(BUILD_TESTS) + cudf_restore_if_enabled(BUILD_BENCHMARKS) if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}") diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 90823f22775..168db61f672 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -359,7 +359,7 @@ class column_view : public detail::column_view_base { auto child_end() const noexcept { return _children.cend(); } private: - friend column_view logical_cast(column_view const& input, data_type type); + friend column_view bit_cast(column_view const& input, data_type type); std::vector _children{}; ///< Based on element type, children ///< may contain additional data @@ -550,7 +550,7 @@ class mutable_column_view : public detail::column_view_base { operator column_view() const; private: - friend mutable_column_view logical_cast(mutable_column_view const& input, data_type type); + friend mutable_column_view bit_cast(mutable_column_view const& input, data_type type); std::vector mutable_children; }; @@ -564,47 +564,49 @@ class mutable_column_view : public detail::column_view_base { size_type count_descendants(column_view parent); /** - * @brief Zero-copy cast between types with the same underlying representation. + * @brief Zero-copy cast between types with the same size and compatible underlying representations. * * This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits * as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that - * have the same width and underlying representation. For example, the way timestamp types are laid - * out in memory is equivalent to an integer representing a duration since a fixed epoch; logically - * casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the - * duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ, - * nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs. + * have the same width and compatible representations. For example, the way timestamp types are laid + * out in memory is equivalent to an integer representing a duration since a fixed epoch; + * bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of + * the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value. + * However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view + * column be casted into a numeric type column as their data representations are not compatible. * - * The validity of the conversion can be checked with `cudf::is_logically_castable()`. + * The validity of the conversion can be checked with `cudf::is_bit_castable()`. * * @throws cudf::logic_error if the specified cast is not possible, i.e., - * `is_logically_castable(input.type(), type)` is false. + * `is_bit_castable(input.type(), type)` is false. * * @param input The `column_view` to cast from * @param type The `data_type` to cast to * @return New `column_view` wrapping the same data as `input` but cast to `type` */ -column_view logical_cast(column_view const& input, data_type type); +column_view bit_cast(column_view const& input, data_type type); /** - * @brief Zero-copy cast between types with the same underlying representation. + * @brief Zero-copy cast between types with the same size and compatible underlying representations. * * This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits * as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that - * have the same width and underlying representation. For example, the way timestamp types are laid - * out in memory is equivalent to an integer representing a duration since a fixed epoch; logically - * casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the - * duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ, - * nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs. + * have the same width and compatible representations. For example, the way timestamp types are laid + * out in memory is equivalent to an integer representing a duration since a fixed epoch; + * bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of + * the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value. + * However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view + * column be casted into a numeric type column as their data representations are not compatible. * - * The validity of the conversion can be checked with `cudf::is_logically_castable()`. + * The validity of the conversion can be checked with `cudf::is_bit_castable()`. * * @throws cudf::logic_error if the specified cast is not possible, i.e., - * `is_logically_castable(input.type(), type)` is false. + * `is_bit_castable(input.type(), type)` is false. * * @param input The `mutable_column_view` to cast from * @param type The `data_type` to cast to * @return New `mutable_column_view` wrapping the same data as `input` but cast to `type` */ -mutable_column_view logical_cast(mutable_column_view const& input, data_type type); +mutable_column_view bit_cast(mutable_column_view const& input, data_type type); } // namespace cudf diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 8250e351a58..806b6b790f3 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -92,7 +92,7 @@ struct DeviceCount { * values. Also, this char pointer serves as valid device pointer of identity * value for minimum operator on string values. */ -__constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"}; +static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"}; /* @brief binary `min` operator */ struct DeviceMin { diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 5271bed14c8..a0a0a22091e 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -113,8 +113,13 @@ std::unique_ptr make_string_scalar( * @throws std::bad_alloc if device memory allocation fails * * @param type The desired element type + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. */ -std::unique_ptr make_default_constructed_scalar(data_type type); +std::unique_ptr make_default_constructed_scalar( + data_type type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct scalar using the given value of fixed width type diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 0e89058050d..1153d4f8ff3 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -591,74 +591,57 @@ constexpr inline bool is_nested(data_type type) return cudf::type_dispatcher(type, is_nested_impl{}); } -template -struct is_logically_castable_impl : std::false_type { -}; - -// Allow cast to same type -template -struct is_logically_castable_impl : std::true_type { -}; - -#ifndef MAP_CASTABLE_TYPES -#define MAP_CASTABLE_TYPES(Type1, Type2) \ - template <> \ - struct is_logically_castable_impl : std::true_type { \ - }; \ - template <> \ - struct is_logically_castable_impl : std::true_type { \ - }; -#endif - -// Allow cast between timestamp and integer representation -MAP_CASTABLE_TYPES(cudf::timestamp_D, cudf::timestamp_D::duration::rep); -MAP_CASTABLE_TYPES(cudf::timestamp_s, cudf::timestamp_s::duration::rep); -MAP_CASTABLE_TYPES(cudf::timestamp_ms, cudf::timestamp_ms::duration::rep); -MAP_CASTABLE_TYPES(cudf::timestamp_us, cudf::timestamp_us::duration::rep); -MAP_CASTABLE_TYPES(cudf::timestamp_ns, cudf::timestamp_ns::duration::rep); -// Allow cast between durations and integer representation -MAP_CASTABLE_TYPES(cudf::duration_D, cudf::duration_D::rep); -MAP_CASTABLE_TYPES(cudf::duration_s, cudf::duration_s::rep); -MAP_CASTABLE_TYPES(cudf::duration_ms, cudf::duration_ms::rep); -MAP_CASTABLE_TYPES(cudf::duration_us, cudf::duration_us::rep); -MAP_CASTABLE_TYPES(cudf::duration_ns, cudf::duration_ns::rep); -// Allow cast between decimals and integer representation -MAP_CASTABLE_TYPES(numeric::decimal32, numeric::decimal32::rep); -MAP_CASTABLE_TYPES(numeric::decimal64, numeric::decimal64::rep); - template -struct is_logically_castable_to_impl { - template +struct is_bit_castable_to_impl { + template ()>* = nullptr> + constexpr bool operator()() + { + return false; + } + + template ()>* = nullptr> constexpr bool operator()() { - return is_logically_castable_impl::value; + if (not cuda::std::is_trivially_copyable_v || + not cuda::std::is_trivially_copyable_v) { + return false; + } + constexpr auto from_size = sizeof(cudf::device_storage_type_t); + constexpr auto to_size = sizeof(cudf::device_storage_type_t); + return from_size == to_size; } }; -struct is_logically_castable_from_impl { - template +struct is_bit_castable_from_impl { + template ()>* = nullptr> + constexpr bool operator()(data_type) + { + return false; + } + + template ()>* = nullptr> constexpr bool operator()(data_type to) { - return type_dispatcher(to, is_logically_castable_to_impl{}); + return cudf::type_dispatcher(to, is_bit_castable_to_impl{}); } }; /** - * @brief Indicates whether `from` is logically castable to `to`. + * @brief Indicates whether `from` is bit-castable to `to`. * - * Data types that have the same size and underlying representation, e.g. INT32 and TIMESTAMP_DAYS - * which are both represented as 32-bit integers in memory, are eligible for logical cast. + * This casting is based on std::bit_cast. Data types that have the same size and are trivially + * copyable are eligible for this casting. * - * See `cudf::logical_cast()` which returns a zero-copy `column_view` when casting between - * logically castable types. + * See `cudf::bit_cast()` which returns a zero-copy `column_view` when casting between + * bit-castable types. * * @param from The `data_type` to convert from * @param to The `data_type` to convert to - * @return `true` if the types are logically castable + * @return `true` if the types are castable */ -constexpr bool is_logically_castable(data_type from, data_type to) +constexpr bool is_bit_castable(data_type from, data_type to) { - return type_dispatcher(from, is_logically_castable_from_impl{}, to); + return type_dispatcher(from, is_bit_castable_from_impl{}, to); } template diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 3e784401be2..186669ae697 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -132,9 +132,9 @@ size_type count_descendants(column_view parent) return std::accumulate(begin, begin + parent.num_children(), size_type{parent.num_children()}); } -column_view logical_cast(column_view const& input, data_type type) +column_view bit_cast(column_view const& input, data_type type) { - CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable"); + CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable"); return column_view{type, input._size, input._data, @@ -144,9 +144,9 @@ column_view logical_cast(column_view const& input, data_type type) input._children}; } -mutable_column_view logical_cast(mutable_column_view const& input, data_type type) +mutable_column_view bit_cast(mutable_column_view const& input, data_type type) { - CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable"); + CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable"); return mutable_column_view{type, input._size, const_cast(input._data), diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index fc241dd9e32..446f9b0dda9 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -101,7 +101,7 @@ struct get_element_functor { stream); if (!key_index_scalar.is_valid(stream)) { - auto null_result = make_default_constructed_scalar(dict_view.keys().type()); + auto null_result = make_default_constructed_scalar(dict_view.keys().type(), stream, mr); null_result->set_valid(false, stream); return null_result; } diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index a9d8a61d88f..e7ae51ded9e 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -77,7 +77,7 @@ struct in_place_fill_range_dispatch { auto unscaled = static_cast const&>(value).value(); using RepType = typename T::rep; auto s = cudf::numeric_scalar(unscaled, value.is_valid()); - auto view = cudf::logical_cast(destination, s.type()); + auto view = cudf::bit_cast(destination, s.type()); in_place_fill(view, begin, end, s, stream); } diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 58db90b600d..c99b366c2dd 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -252,8 +252,8 @@ std::pair, std::unique_ptr> minmax( if (col.null_count() == col.size()) { // this handles empty and all-null columns // return scalars with valid==false - return {make_default_constructed_scalar(col.type()), - make_default_constructed_scalar(col.type())}; + return {make_default_constructed_scalar(col.type(), stream, mr), + make_default_constructed_scalar(col.type(), stream, mr)}; } return type_dispatcher(col.type(), minmax_functor{}, col, stream, mr); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 3c10ac61643..43dd86b307f 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -112,7 +112,7 @@ std::unique_ptr reduce( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - std::unique_ptr result = make_default_constructed_scalar(output_dtype); + std::unique_ptr result = make_default_constructed_scalar(output_dtype, stream, mr); result->set_valid(false, stream); // check if input column is empty diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 644b320dcd5..5714eaee864 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -100,36 +100,48 @@ std::unique_ptr make_fixed_width_scalar(data_type type, namespace { struct default_scalar_functor { template - std::unique_ptr operator()() + std::unique_ptr operator()(rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - using ScalarType = scalar_type_t; - return std::unique_ptr(new ScalarType); + return make_fixed_width_scalar(data_type(type_to_id()), stream, mr); } }; template <> -std::unique_ptr default_scalar_functor::operator()() +std::unique_ptr default_scalar_functor::operator()( + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) +{ + return std::unique_ptr(new string_scalar("", false, stream, mr)); +} + +template <> +std::unique_ptr default_scalar_functor::operator()( + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FAIL("dictionary type not supported"); } template <> -std::unique_ptr default_scalar_functor::operator()() +std::unique_ptr default_scalar_functor::operator()( + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FAIL("list_view type not supported"); } template <> -std::unique_ptr default_scalar_functor::operator()() +std::unique_ptr default_scalar_functor::operator()( + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_FAIL("struct_view type not supported"); } } // namespace -std::unique_ptr make_default_constructed_scalar(data_type type) +std::unique_ptr make_default_constructed_scalar(data_type type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return type_dispatcher(type, default_scalar_functor{}); + return type_dispatcher(type, default_scalar_functor{}, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ea3dd8560fd..492767c5d2f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -35,8 +35,6 @@ target_compile_features(cudftestutil PUBLIC cxx_std_14 cuda_std_14) target_link_libraries(cudftestutil PUBLIC GTest::gmock GTest::gtest - GTest::gmock_main - GTest::gtest_main Threads::Threads cudf) @@ -55,7 +53,7 @@ function(ConfigureTest CMAKE_TEST_NAME ) add_executable(${CMAKE_TEST_NAME} ${ARGN}) set_target_properties(${CMAKE_TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cudftestutil) + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE cudftestutil GTest::gmock_main GTest::gtest_main) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() @@ -66,8 +64,8 @@ endfunction() ################################################################################################### # - column tests ---------------------------------------------------------------------------------- ConfigureTest(COLUMN_TEST + column/bit_cast_test.cpp column/column_test.cu - column/column_view_test.cpp column/column_device_view_test.cu column/compound_test.cu) @@ -300,19 +298,11 @@ ConfigureTest(STREAM_COMPACTION_TEST ################################################################################################### # - rolling tests --------------------------------------------------------------------------------- -ConfigureTest(ROLLING_TEST rolling/rolling_test.cpp) - -################################################################################################### -# - grouped rolling tests ------------------------------------------------------------------------- -ConfigureTest(GROUPED_ROLLING_TEST grouped_rolling/grouped_rolling_test.cpp) - -################################################################################################### -# - lead/lag rolling tests ------------------------------------------------------------------------ -ConfigureTest(LEAD_LAG_TEST lead_lag/lead_lag_test.cpp) - -################################################################################################### -# - collect_list rolling tests -------------------------------------------------------------------- -ConfigureTest(COLLECT_LIST_TEST collect_list/collect_list_test.cpp) +ConfigureTest(ROLLING_TEST + rolling/rolling_test.cpp + rolling/grouped_rolling_test.cpp + rolling/lead_lag_test.cpp + rolling/collect_list_test.cpp) ################################################################################################### # - filling test ---------------------------------------------------------------------------------- diff --git a/cpp/tests/column/bit_cast_test.cpp b/cpp/tests/column/bit_cast_test.cpp new file mode 100644 index 00000000000..038b139951c --- /dev/null +++ b/cpp/tests/column/bit_cast_test.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +template +struct rep_type_impl { + using type = void; +}; + +template +struct rep_type_impl()>> { + using type = typename T::duration::rep; +}; + +template +struct rep_type_impl()>> { + using type = typename T::rep; +}; + +template +struct rep_type_impl()>> { + using type = typename T::rep; +}; + +template +using rep_type_t = typename rep_type_impl::type; + +template +struct ColumnViewAllTypesTests : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(ColumnViewAllTypesTests, cudf::test::FixedWidthTypes); + +template +void do_bit_cast(cudf::column_view const& column_view, Iterator begin, Iterator end) +{ + auto mutable_column_view = reinterpret_cast(column_view); + cudf::data_type to_type{cudf::type_to_id()}; + + if (std::is_same::value) { + // Cast to same to_type + auto output = cudf::bit_cast(column_view, column_view.type()); + auto output1 = cudf::bit_cast(mutable_column_view, mutable_column_view.type()); + cudf::test::expect_columns_equal(output, column_view); + cudf::test::expect_columns_equal(output1, mutable_column_view); + } else if (std::is_same, ToType>::value || + std::is_same>::value) { + // Cast integer to timestamp or vice versa + auto output = cudf::bit_cast(column_view, to_type); + auto output1 = cudf::bit_cast(mutable_column_view, to_type); + cudf::test::fixed_width_column_wrapper expected(begin, end); + cudf::test::expect_columns_equal(output, expected); + cudf::test::expect_columns_equal(output1, expected); + } else { + if (cuda::std::is_trivially_copyable_v && + cuda::std::is_trivially_copyable_v) { + constexpr auto from_size = sizeof(cudf::device_storage_type_t); + constexpr auto to_size = sizeof(cudf::device_storage_type_t); + if (from_size == to_size) { + // Cast from FromType to ToType + auto output1 = cudf::bit_cast(column_view, to_type); + auto output1_mutable = cudf::bit_cast(mutable_column_view, to_type); + + // Cast back from ToType to FromType + cudf::data_type from_type{cudf::type_to_id()}; + auto output2 = cudf::bit_cast(output1, from_type); + auto output2_mutable = cudf::bit_cast(output1_mutable, from_type); + + cudf::test::expect_columns_equal(output2, column_view); + cudf::test::expect_columns_equal(output2_mutable, mutable_column_view); + } else { + // Not allow to cast if sizes are mismatched + EXPECT_THROW(cudf::bit_cast(column_view, to_type), cudf::logic_error); + EXPECT_THROW(cudf::bit_cast(mutable_column_view, to_type), cudf::logic_error); + } + } else { + // Not allow to cast if any of from/to types is not trivially copyable + EXPECT_THROW(cudf::bit_cast(column_view, to_type), cudf::logic_error); + EXPECT_THROW(cudf::bit_cast(mutable_column_view, to_type), cudf::logic_error); + } + } +} + +TYPED_TEST(ColumnViewAllTypesTests, BitCast) +{ + auto begin = thrust::make_counting_iterator(1); + auto end = thrust::make_counting_iterator(16); + + cudf::test::fixed_width_column_wrapper input(begin, end); + + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); + do_bit_cast(input, begin, end); +} diff --git a/cpp/tests/column/column_view_test.cpp b/cpp/tests/column/column_view_test.cpp deleted file mode 100644 index 566db84f51a..00000000000 --- a/cpp/tests/column/column_view_test.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include - -template -struct rep_type_impl { - using type = void; -}; - -template -struct rep_type_impl()>> { - using type = typename T::duration::rep; -}; - -template -struct rep_type_impl()>> { - using type = typename T::rep; -}; - -template -struct rep_type_impl()>> { - using type = typename T::rep; -}; - -template -using rep_type_t = typename rep_type_impl::type; - -template -struct ColumnViewAllTypesTests : public cudf::test::BaseFixture { -}; - -TYPED_TEST_CASE(ColumnViewAllTypesTests, cudf::test::FixedWidthTypes); - -template -void do_logical_cast(cudf::column_view const& column_view, Iterator begin, Iterator end) -{ - auto mutable_column_view = reinterpret_cast(column_view); - if (std::is_same::value) { - // Cast to same type - auto output = cudf::logical_cast(column_view, column_view.type()); - auto output1 = cudf::logical_cast(mutable_column_view, mutable_column_view.type()); - cudf::test::expect_columns_equal(output, column_view); - cudf::test::expect_columns_equal(output1, mutable_column_view); - } else if (std::is_same, ToType>::value || - std::is_same>::value) { - // Cast integer to timestamp or vice versa - cudf::data_type type{cudf::type_to_id()}; - auto output = cudf::logical_cast(column_view, type); - auto output1 = cudf::logical_cast(mutable_column_view, type); - cudf::test::fixed_width_column_wrapper expected(begin, end); - cudf::test::expect_columns_equal(output, expected); - cudf::test::expect_columns_equal(output1, expected); - } else { - // Other casts not allowed - cudf::data_type type{cudf::type_to_id()}; - EXPECT_THROW(cudf::logical_cast(column_view, type), cudf::logic_error); - EXPECT_THROW(cudf::logical_cast(mutable_column_view, type), cudf::logic_error); - } -} - -TYPED_TEST(ColumnViewAllTypesTests, LogicalCast) -{ - auto begin = thrust::make_counting_iterator(1); - auto end = thrust::make_counting_iterator(16); - - cudf::test::fixed_width_column_wrapper input(begin, end); - - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); - do_logical_cast(input, begin, end); -} diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu index 1f1d20eb8ae..7fd69e90efd 100644 --- a/cpp/tests/hash_map/multimap_test.cu +++ b/cpp/tests/hash_map/multimap_test.cu @@ -15,6 +15,7 @@ */ #include +#include #include @@ -46,11 +47,15 @@ class MultimapTest : public cudf::test::BaseFixture { using value_type = typename T::value_type; using size_type = int; - using multimap_type = concurrent_unordered_multimap::max(), - std::numeric_limits::max()>; + using multimap_type = + concurrent_unordered_multimap::max(), + std::numeric_limits::max(), + default_hash, + equal_to, + default_allocator>>; std::unique_ptr> the_map; @@ -91,12 +96,3 @@ TYPED_TEST(MultimapTest, InitialState) auto end = this->the_map->end(); EXPECT_NE(begin, end); } - -TYPED_TEST(MultimapTest, CheckUnusedValues) -{ - EXPECT_EQ(this->the_map->get_unused_key(), this->unused_key); - - auto begin = this->the_map->begin(); - EXPECT_EQ(begin->first, this->unused_key); - EXPECT_EQ(begin->second, this->unused_value); -} diff --git a/cpp/tests/collect_list/collect_list_test.cpp b/cpp/tests/rolling/collect_list_test.cpp similarity index 99% rename from cpp/tests/collect_list/collect_list_test.cpp rename to cpp/tests/rolling/collect_list_test.cpp index 2e48ff02183..6a3a80601d0 100644 --- a/cpp/tests/collect_list/collect_list_test.cpp +++ b/cpp/tests/rolling/collect_list_test.cpp @@ -1230,5 +1230,3 @@ TYPED_TEST(TypedCollectListTest, GroupedTimeRangeRollingWindowOnStructsWithMinPe CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_with_nulls_excluded->view()); } - -CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/grouped_rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp similarity index 99% rename from cpp/tests/grouped_rolling/grouped_rolling_test.cpp rename to cpp/tests/rolling/grouped_rolling_test.cpp index a1de3cb3638..6f930f99b50 100644 --- a/cpp/tests/grouped_rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -2387,4 +2387,3 @@ TYPED_TEST(TypedUnboundedWindowTest, UnboundedPrecedingAndFollowingWindowMultiGr fixed_width_column_wrapper{ {3, 3, 3, 3, 3, 4, 4, 4, 4, 4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}); } -CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/lead_lag/lead_lag_test.cpp b/cpp/tests/rolling/lead_lag_test.cpp similarity index 99% rename from cpp/tests/lead_lag/lead_lag_test.cpp rename to cpp/tests/rolling/lead_lag_test.cpp index 749475a9fc3..1cf7d74285c 100644 --- a/cpp/tests/lead_lag/lead_lag_test.cpp +++ b/cpp/tests/rolling/lead_lag_test.cpp @@ -541,5 +541,3 @@ TEST_F(LeadLagWindowTest, LeadLagWithoutFixedWidthInput) cudf::make_lead_aggregation(4)), cudf::logic_error); } - -CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp index c91fb6b3b5e..fd0a92a0168 100644 --- a/cpp/tests/scalar/factories_test.cpp +++ b/cpp/tests/scalar/factories_test.cpp @@ -89,7 +89,7 @@ TYPED_TEST(TimestampScalarFactory, TypeCast) } template -struct DefaultScalarFactory : public cudf::test::BaseFixture { +struct DefaultScalarFactory : public ScalarFactoryTest { static constexpr auto factory = cudf::make_default_constructed_scalar; }; @@ -98,7 +98,8 @@ TYPED_TEST_CASE(DefaultScalarFactory, MixedTypes); TYPED_TEST(DefaultScalarFactory, FactoryDefault) { - std::unique_ptr s = this->factory(cudf::data_type{cudf::type_to_id()}); + std::unique_ptr s = + this->factory(cudf::data_type{cudf::type_to_id()}, this->stream(), this->mr()); EXPECT_EQ(s->type(), cudf::data_type{cudf::type_to_id()}); EXPECT_FALSE(s->is_valid()); @@ -106,7 +107,8 @@ TYPED_TEST(DefaultScalarFactory, FactoryDefault) TYPED_TEST(DefaultScalarFactory, TypeCast) { - std::unique_ptr s = this->factory(cudf::data_type{cudf::type_to_id()}); + std::unique_ptr s = + this->factory(cudf::data_type{cudf::type_to_id()}, this->stream(), this->mr()); auto numeric_s = static_cast*>(s.get()); diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 1dce52f7105..331c5b08764 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -1106,9 +1106,7 @@ public Scalar any(DType outType) { * Returns a boolean scalar that is true if all of the elements in * the column are true or non-zero otherwise false. * Null values are skipped. - * @deprecated the only output type supported is BOOL8. */ - @Deprecated public Scalar all() { return all(DType.BOOL8); } @@ -1118,7 +1116,9 @@ public Scalar all() { * if all of the elements in the column are true or non-zero * otherwise false or 0. * Null values are skipped. + * @deprecated the only output type supported is BOOL8. */ + @Deprecated public Scalar all(DType outType) { return reduce(Aggregation.all(), outType); } @@ -1304,9 +1304,24 @@ public ColumnVector castTo(DType type) { * types must match. * @param type the type you want to go to. * @return a ColumnView that cannot outlive the Column that owns the actual data it points to. + * @deprecated this has changed to bit_cast in C++ so use that name instead */ + @Deprecated public ColumnView logicalCastTo(DType type) { - return new ColumnView(logicalCastTo(getNativeView(), + return bitCastTo(type); + } + + /** + * Zero-copy cast between types with the same underlying length. + * + * Similar to bit_cast in C++. This will take the underlying data and create new metadata + * so it is interpreted as a new type. Not all types are supported the width of the + * types must match. + * @param type the type you want to go to. + * @return a ColumnView that cannot outlive the Column that owns the actual data it points to. + */ + public ColumnView bitCastTo(DType type) { + return new ColumnView(bitCastTo(getNativeView(), type.typeId.getNativeId(), type.getScale())); } @@ -2607,7 +2622,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat private static native long castTo(long nativeHandle, int type, int scale); - private static native long logicalCastTo(long nativeHandle, int type, int scale); + private static native long bitCastTo(long nativeHandle, int type, int scale); private static native long byteListCast(long nativeHandle, boolean config); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 47aa30e5d31..a0613f9b73f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -760,16 +760,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_castTo(JNIEnv *env, jclas CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_logicalCastTo(JNIEnv *env, jclass, - jlong handle, jint type, - jint scale) { +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitCastTo(JNIEnv *env, jclass, + jlong handle, jint type, + jint scale) { JNI_NULL_CHECK(env, handle, "native handle is null", 0); try { cudf::jni::auto_set_device(env); cudf::column_view *column = reinterpret_cast(handle); cudf::data_type n_data_type = cudf::jni::make_data_type(type, scale); std::unique_ptr result = std::make_unique(); - *result = cudf::logical_cast(*column, n_data_type); + *result = cudf::bit_cast(*column, n_data_type); return reinterpret_cast(result.release()); } CATCH_STD(env, 0); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 9dba4c4c184..a3500ae86ef 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -2061,10 +2061,10 @@ void emptyStringColumnFindReplaceAll() { } @Test - void testLogicalCast() { + void testBitCast() { try (ColumnVector cv = ColumnVector.decimalFromLongs(-2, 1L, 2L, 100L, 552L); ColumnVector expected = ColumnVector.fromLongs(1L, 2L, 100L, 552L); - ColumnView casted = cv.logicalCastTo(DType.INT64)) { + ColumnView casted = cv.bitCastTo(DType.INT64)) { assertColumnsAreEqual(expected, casted); } } diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 0fedfcabb46..807e96f2c38 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -7,3 +7,4 @@ PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0") PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1") PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2") +PANDAS_EQ_123 = PANDAS_VERSION == version.parse("1.2.3") diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index fc885a13808..4d2e2a4b33b 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_120 +from cudf.core._compat import PANDAS_EQ_123, PANDAS_GE_120 from cudf.tests.utils import assert_eq, assert_exceptions_equal @@ -21,9 +21,8 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value): @pytest.mark.xfail( - condition=not PANDAS_GE_120, - reason="pandas incorrectly adds nulls with dataframes " - "but works fine with scalars", + condition=PANDAS_EQ_123 or not PANDAS_GE_120, + reason="https://github.com/pandas-dev/pandas/issues/40204", ) def test_dataframe_setitem_scaler_bool(): df = pd.DataFrame({"a": [1, 2, 3]})