diff --git a/build.sh b/build.sh index 5eb404d02a8..d75053f8849 100755 --- a/build.sh +++ b/build.sh @@ -135,7 +135,7 @@ if hasArg clean; then fi if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then - CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL" + CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=" echo "Building for the architecture of the GPU in the system..." else CUDF_CMAKE_CUDA_ARCHITECTURES="" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index e709824721c..5657d21889f 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -134,6 +134,7 @@ test: - test -f $PREFIX/include/cudf/lists/detail/copying.hpp - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp - test -f $PREFIX/include/cudf/lists/count_elements.hpp + - test -f $PREFIX/include/cudf/lists/explode.hpp - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp - test -f $PREFIX/include/cudf/lists/extract.hpp - test -f $PREFIX/include/cudf/lists/contains.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2a51ad5e55a..f15fd649b83 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -117,6 +117,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # find zlib find_package(ZLIB REQUIRED) +# find Threads (needed by cudftestutil) +find_package(Threads REQUIRED) # add third party dependencies using CPM include(cmake/thirdparty/CUDF_GetCPM.cmake) # find boost @@ -133,8 +135,12 @@ include(cmake/thirdparty/CUDF_GetArrow.cmake) include(cmake/thirdparty/CUDF_GetDLPack.cmake) # find libcu++ include(cmake/thirdparty/CUDF_GetLibcudacxx.cmake) +# find or install GoogleTest +include(cmake/thirdparty/CUDF_GetGTest.cmake) # Stringify libcudf and libcudacxx headers used in JIT operations include(cmake/Modules/StringifyJITHeaders.cmake) +# find cuFile +include(cmake/Modules/FindcuFile.cmake) ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -240,6 +246,7 @@ add_library(cudf src/io/statistics/column_stats.cu src/io/utilities/data_sink.cpp src/io/utilities/datasource.cpp + src/io/utilities/file_io_utilities.cpp src/io/utilities/parsing_utils.cu src/io/utilities/type_conversion.cpp src/jit/cache.cpp @@ -256,6 +263,7 @@ add_library(cudf src/lists/copying/gather.cu src/lists/copying/segmented_gather.cu src/lists/count_elements.cu + src/lists/explode.cu src/lists/extract.cu src/lists/drop_list_duplicates.cu src/lists/lists_column_factories.cu @@ -285,7 +293,6 @@ add_library(cudf src/replace/nulls.cu src/replace/replace.cu src/reshape/byte_cast.cu - src/reshape/explode.cu src/reshape/interleave_columns.cu src/reshape/tile.cu src/rolling/grouped_rolling.cu @@ -417,7 +424,8 @@ target_include_directories(cudf "$" "$" PRIVATE "$" - INTERFACE "$") + INTERFACE "$" + "$") # Add Conda library paths if specified if(CONDA_LINK_DIRS) @@ -464,6 +472,11 @@ else() target_link_libraries(cudf PUBLIC CUDA::nvrtc CUDA::cudart CUDA::cuda_driver) endif() +# Add cuFile interface if available +if(TARGET cuFile::cuFile_interface) + target_link_libraries(cudf PRIVATE cuFile::cuFile_interface) +endif() + file(WRITE "${CUDF_BINARY_DIR}/fatbin.ld" [=[ SECTIONS @@ -480,34 +493,37 @@ add_library(cudf::cudf ALIAS cudf) # - tests and benchmarks -------------------------------------------------------------------------- ################################################################################################### -if (CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS) - # Find or install GoogleTest - CPMFindPackage(NAME GTest - VERSION 1.10.0 - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.10.0 - GIT_SHALLOW TRUE - OPTIONS "INSTALL_GTEST OFF" - # googletest >= 1.10.0 provides a cmake config file -- use it if it exists - FIND_PACKAGE_ARGUMENTS "CONFIG") - # Add GTest aliases if they don't already exist. - # Assumes if GTest::gtest doesn't exist, the others don't either. - # TODO: Is this always a valid assumption? - if(NOT TARGET GTest::gtest) - add_library(GTest::gtest ALIAS gtest) - add_library(GTest::gmock ALIAS gmock) - add_library(GTest::gtest_main ALIAS gtest_main) - add_library(GTest::gmock_main ALIAS gmock_main) - endif() - if(GTest_ADDED) - install(TARGETS gmock - gtest - gmock_main - gtest_main - DESTINATION lib - EXPORT cudf-targets) - endif() -endif() +################################################################################################### +# - build cudftestutil ---------------------------------------------------------------------------- + +add_library(cudftestutil STATIC + tests/utilities/base_fixture.cpp + tests/utilities/column_utilities.cu + tests/utilities/table_utilities.cu + tests/strings/utilities.cu) + +target_compile_options(cudftestutil + PUBLIC "$<$:${CUDF_CXX_FLAGS}>" + "$<$:${CUDF_CUDA_FLAGS}>" +) + +target_compile_features(cudftestutil PUBLIC cxx_std_14 cuda_std_14) + +target_link_libraries(cudftestutil + PUBLIC GTest::gmock + GTest::gtest + Threads::Threads + cudf) + +target_include_directories(cudftestutil + PUBLIC "$" + "$") + +install(TARGETS cudftestutil + DESTINATION lib + EXPORT cudf-targets) + +add_library(cudf::cudftestutil ALIAS cudftestutil) ################################################################################################### # - add tests ------------------------------------------------------------------------------------- diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 9e11f4df075..cd083ebec7a 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -185,9 +185,11 @@ ConfigureBench(STRINGS_BENCH string/convert_floats_benchmark.cpp string/copy_benchmark.cpp string/extract_benchmark.cpp + string/factory_benchmark.cu string/filter_benchmark.cpp string/find_benchmark.cpp string/replace_benchmark.cpp string/replace_re_benchmark.cpp string/split_benchmark.cpp + string/substring_benchmark.cpp string/url_decode_benchmark.cpp) diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index ad2ce095b6e..dd1bbcba0b4 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -88,4 +88,4 @@ class benchmark : public ::benchmark::Fixture { std::shared_ptr mr; }; -}; // namespace cudf +} // namespace cudf diff --git a/cpp/benchmarks/string/factory_benchmark.cu b/cpp/benchmarks/string/factory_benchmark.cu new file mode 100644 index 00000000000..6c5dceffaa8 --- /dev/null +++ b/cpp/benchmarks/string/factory_benchmark.cu @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include + +namespace { +using string_pair = thrust::pair; +struct string_view_to_pair { + __device__ string_pair operator()(thrust::pair const& p) + { + return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; + } +}; +} // namespace + +class StringsFactory : public cudf::benchmark { +}; + +static void BM_factory(benchmark::State& state) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto d_column = cudf::column_device_view::create(table->view().column(0)); + rmm::device_vector pairs(d_column->size()); + thrust::transform(thrust::device, + d_column->pair_begin(), + d_column->pair_end(), + pairs.data(), + string_view_to_pair{}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + cudf::make_strings_column(pairs); + } + + cudf::strings_column_view input(table->view().column(0)); + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringsFactory, name) \ + (::benchmark::State & st) { BM_factory(st); } \ + BENCHMARK_REGISTER_F(StringsFactory, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(factory) diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp index f81f859de74..9c709b064dd 100644 --- a/cpp/benchmarks/string/string_bench_args.hpp +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -17,6 +17,8 @@ #include +#include + /** * @brief Generate row count and row length argument ranges for a string benchmark. * diff --git a/cpp/benchmarks/string/substring_benchmark.cpp b/cpp/benchmarks/string/substring_benchmark.cpp new file mode 100644 index 00000000000..d47c42e45be --- /dev/null +++ b/cpp/benchmarks/string/substring_benchmark.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "string_bench_args.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +class StringSubstring : public cudf::benchmark { +}; + +enum substring_type { position, multi_position, delimiter, multi_delimiter }; + +static void BM_substring(benchmark::State& state, substring_type rt) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + auto starts_itr = thrust::constant_iterator(1); + auto stops_itr = thrust::constant_iterator(max_str_length / 2); + cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); + cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); + auto delim_itr = thrust::constant_iterator(" "); + cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (rt) { + case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break; + case multi_position: cudf::strings::slice_strings(input, starts, stops); break; + case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break; + case multi_delimiter: + cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1); + break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringSubstring, name) \ + (::benchmark::State & st) { BM_substring(st, substring_type::name); } \ + BENCHMARK_REGISTER_F(StringSubstring, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(position) +STRINGS_BENCHMARK_DEFINE(multi_position) +STRINGS_BENCHMARK_DEFINE(delimiter) +STRINGS_BENCHMARK_DEFINE(multi_delimiter) diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake index 44699a13206..d4be6e65021 100644 --- a/cpp/cmake/Modules/ConfigureCUDA.cmake +++ b/cpp/cmake/Modules/ConfigureCUDA.cmake @@ -17,26 +17,16 @@ # Find the CUDAToolkit find_package(CUDAToolkit REQUIRED) -# Must come after find_package(CUDAToolkit) because we symlink -# ccache as a compiler front-end for nvcc in gpuCI CPU builds. -enable_language(CUDA) - -if(CMAKE_CUDA_COMPILER_VERSION) - # Compute the version. from CMAKE_CUDA_COMPILER_VERSION - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}") -endif() - -message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}") -message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}") -message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}") - # Auto-detect available GPU compute architectures - include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake) message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}") +# Must come after find_package(CUDAToolkit) because we symlink +# ccache as a compiler front-end for nvcc in gpuCI CPU builds. +# Must also come after we detect and potentially rewrite +# CMAKE_CUDA_ARCHITECTURES +enable_language(CUDA) + if(CMAKE_COMPILER_IS_GNUCXX) list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS) diff --git a/cpp/cmake/Modules/EvalGPUArchs.cmake b/cpp/cmake/Modules/EvalGPUArchs.cmake index 6c747a0b867..09e42c6cc7a 100644 --- a/cpp/cmake/Modules/EvalGPUArchs.cmake +++ b/cpp/cmake/Modules/EvalGPUArchs.cmake @@ -14,12 +14,21 @@ # limitations under the License. #============================================================================= +# Unset this first in case it's set to +set(CMAKE_CUDA_ARCHITECTURES OFF) + +# Enable CUDA so we can invoke nvcc +enable_language(CUDA) + +# Function uses the CUDA runtime API to query the compute capability of the device, so if a user +# doesn't pass any architecture options to CMake we only build the current architecture function(evaluate_gpu_archs gpu_archs) set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu) set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log) - file(WRITE ${eval_file} -[=[ + file( + WRITE ${eval_file} + " #include #include #include @@ -32,32 +41,30 @@ int main(int argc, char** argv) { char buff[32]; cudaDeviceProp prop; if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue; - sprintf(buff, "%d%d", prop.major, prop.minor); + sprintf(buff, \"%d%d\", prop.major, prop.minor); archs.insert(buff); } } if(archs.empty()) { - printf("ALL"); + printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\"); } else { bool first = true; for(const auto& arch : archs) { - printf(first? "%s" : ";%s", arch.c_str()); + printf(first? \"%s\" : \";%s\", arch.c_str()); first = false; } } - printf("\n"); + printf(\"\\n\"); return 0; } -]=]) +") execute_process( - COMMAND ${CMAKE_CUDA_COMPILER} - -std=c++11 - -o ${eval_exe} - --run - ${eval_file} + COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file} OUTPUT_VARIABLE __gpu_archs OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_FILE ${error_file}) - message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}") - set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE) -endfunction() + message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}") + set(${gpu_archs} + ${__gpu_archs} + PARENT_SCOPE) +endfunction(evaluate_gpu_archs) diff --git a/cpp/cmake/Modules/FindcuFile.cmake b/cpp/cmake/Modules/FindcuFile.cmake index e67b79d9d60..4f67e186f42 100644 --- a/cpp/cmake/Modules/FindcuFile.cmake +++ b/cpp/cmake/Modules/FindcuFile.cmake @@ -93,6 +93,12 @@ find_package_handle_standard_args(cuFile cuFile_VERSION ) +if (cuFile_INCLUDE_DIR AND NOT TARGET cuFile::cuFile_interface) + add_library(cuFile::cuFile_interface IMPORTED INTERFACE) + target_include_directories(cuFile::cuFile_interface INTERFACE "$") + target_compile_options(cuFile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}") + target_compile_definitions(cuFile::cuFile_interface INTERFACE CUFILE_FOUND) +endif () if (cuFile_FOUND AND NOT TARGET cuFile::cuFile) add_library(cuFile::cuFile UNKNOWN IMPORTED) diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake index 396023ee9a9..61e4e6bc198 100644 --- a/cpp/cmake/Modules/SetGPUArchs.cmake +++ b/cpp/cmake/Modules/SetGPUArchs.cmake @@ -25,35 +25,41 @@ else() list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11) +# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version +# when find_package(CUDAToolkit) occurs before enable_language(CUDA) +if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER) + execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) +endif() + +if(CUDAToolkit_VERSION_MAJOR LESS 11) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10) +if(CUDAToolkit_VERSION_MAJOR LESS 10) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75") endif() -if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9) +if(CUDAToolkit_VERSION_MAJOR LESS 9) list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70") endif() -if(CUDF_BUILD_FOR_DETECTED_ARCHS) - include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake) - evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES) - if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL") - unset(CMAKE_CUDA_ARCHITECTURES CACHE) - set(CUDF_BUILD_FOR_ALL_ARCHS TRUE) - else() - set(CUDF_BUILD_FOR_ALL_ARCHS FALSE) - list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") - endif() -endif() - -if(CUDF_BUILD_FOR_ALL_ARCHS) +if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS) set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES}) - # CMake architecture list entry of "80" means to build compute and sm. - # What we want is for the newest arch only to build that way - # while the rest built only for sm. - list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING) + + # CMake architecture list entry of "80" means to build compute and sm. What we want is for the + # newest arch only to build that way while the rest built only for sm. list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch) list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch}) + +elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS) + include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake) + evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES) + + list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real") endif() diff --git a/cpp/cmake/Modules/StringifyJITHeaders.cmake b/cpp/cmake/Modules/StringifyJITHeaders.cmake index 36752d7f715..0bfb37773dc 100644 --- a/cpp/cmake/Modules/StringifyJITHeaders.cmake +++ b/cpp/cmake/Modules/StringifyJITHeaders.cmake @@ -164,5 +164,5 @@ add_custom_target(stringify_run DEPENDS # - copy libcu++ ---------------------------------------------------------------------------------- # `${LIBCUDACXX_INCLUDE_DIR}/` specifies that the contents of this directory will be installed (not the directory itself) -file(INSTALL "${LIBCUDACXX_INCLUDE_DIR}/" DESTINATION "${CUDF_GENERATED_INCLUDE_DIR}/include/libcudacxx") -file(INSTALL "${LIBCXX_INCLUDE_DIR}" DESTINATION "${CUDF_GENERATED_INCLUDE_DIR}/include/libcxx") +file(COPY "${LIBCUDACXX_INCLUDE_DIR}/" DESTINATION "${CUDF_GENERATED_INCLUDE_DIR}/include/libcudacxx") +file(COPY "${LIBCXX_INCLUDE_DIR}" DESTINATION "${CUDF_GENERATED_INCLUDE_DIR}/include/libcxx") diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in index 5f6b265384e..3f4d2e5586e 100644 --- a/cpp/cmake/cudf-build-config.cmake.in +++ b/cpp/cmake/cudf-build-config.cmake.in @@ -36,6 +36,8 @@ include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetThrust.cmake) # find rmm set(CUDF_MIN_VERSION_rmm "${CUDF_VERSION_MAJOR}.${CUDF_VERSION_MINOR}") include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetRMM.cmake) +# find gtest +include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake) # find arrow if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-arrow-targets.cmake") diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in index aeb7d9915cf..1147e1160e7 100644 --- a/cpp/cmake/cudf-config.cmake.in +++ b/cpp/cmake/cudf-config.cmake.in @@ -19,13 +19,8 @@ find_dependency(Arrow @CUDF_VERSION_Arrow@) find_dependency(ArrowCUDA @CUDF_VERSION_Arrow@) find_dependency(Boost @CUDF_MIN_VERSION_Boost@) -find_dependency(jitify) find_dependency(rmm @CUDF_MIN_VERSION_rmm@) -find_dependency(Thrust @CUDF_MIN_VERSION_Thrust@) -find_dependency(dlpack @CUDF_MIN_VERSION_dlpack@) -find_dependency(libcudacxx @CUDF_MIN_VERSION_libcudacxx@) - -thrust_create_target(cudf::Thrust FROM_OPTIONS) +find_dependency(gtest @CUDF_MIN_VERSION_gtest@) list(POP_FRONT CMAKE_MODULE_PATH) diff --git a/cpp/cmake/thirdparty/CUDF_GetCPM.cmake b/cpp/cmake/thirdparty/CUDF_GetCPM.cmake index f50b9e7f646..5162aaf6ce7 100644 --- a/cpp/cmake/thirdparty/CUDF_GetCPM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetCPM.cmake @@ -17,3 +17,14 @@ if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) endif() include(${CPM_DOWNLOAD_LOCATION}) + +# If a target is installed, found by the `find_package` step of CPMFindPackage, +# and marked as IMPORTED, make it globally accessible to consumers of our libs. +function(fix_cmake_global_defaults target) + if(TARGET ${target}) + get_target_property(_is_imported ${target} IMPORTED) + if(_is_imported) + set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE) + endif() + endif() +endfunction() diff --git a/cpp/cmake/thirdparty/CUDF_GetGTest.cmake b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake new file mode 100644 index 00000000000..2911e4fce29 --- /dev/null +++ b/cpp/cmake/thirdparty/CUDF_GetGTest.cmake @@ -0,0 +1,53 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gtest VERSION) + # Find or install GoogleTest + CPMFindPackage(NAME GTest + VERSION ${VERSION} + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-${VERSION} + GIT_SHALLOW TRUE + OPTIONS "INSTALL_GTEST OFF" + # googletest >= 1.10.0 provides a cmake config file -- use it if it exists + FIND_PACKAGE_ARGUMENTS "CONFIG") + # Add GTest aliases if they don't already exist. + # Assumes if GTest::gtest doesn't exist, the others don't either. + # TODO: Is this always a valid assumption? + if(NOT TARGET GTest::gtest) + add_library(GTest::gtest ALIAS gtest) + add_library(GTest::gmock ALIAS gmock) + add_library(GTest::gtest_main ALIAS gtest_main) + add_library(GTest::gmock_main ALIAS gmock_main) + endif() + # Make sure consumers of cudf can also see GTest::* targets + fix_cmake_global_defaults(GTest::gtest) + fix_cmake_global_defaults(GTest::gmock) + fix_cmake_global_defaults(GTest::gtest_main) + fix_cmake_global_defaults(GTest::gmock_main) + if(GTest_ADDED) + install(TARGETS gmock + gtest + gmock_main + gtest_main + DESTINATION lib + EXPORT cudf-targets) + endif() +endfunction() + +set(CUDF_MIN_VERSION_gtest 1.10.0) + +find_and_configure_gtest(${CUDF_MIN_VERSION_gtest}) diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake index 16c8a2b39f4..54e0a8620c5 100644 --- a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake @@ -48,13 +48,9 @@ function(find_and_configure_rmm VERSION) cudf_restore_if_enabled(BUILD_TESTS) cudf_restore_if_enabled(BUILD_BENCHMARKS) - #Make sure consumers of cudf can also see rmm::rmm - if(TARGET rmm::rmm) - get_target_property(rmm_is_imported rmm::rmm IMPORTED) - if(rmm_is_imported) - set_target_properties(rmm::rmm PROPERTIES IMPORTED_GLOBAL TRUE) - endif() - endif() + # Make sure consumers of cudf can also see rmm::rmm + fix_cmake_global_defaults(rmm::rmm) + if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH) list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}") set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE) diff --git a/cpp/include/cudf/io/data_sink.hpp b/cpp/include/cudf/io/data_sink.hpp index 0ae403458a0..e0eb60af070 100644 --- a/cpp/include/cudf/io/data_sink.hpp +++ b/cpp/include/cudf/io/data_sink.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,23 +107,35 @@ class data_sink { */ virtual bool supports_device_write() const { return false; } + /** + * @brief Estimates whether a direct device write would be more optimal for the given size. + * + * @param size Number of bytes to write + * @return whether the device write is expected to be more performant for the given size + */ + virtual bool is_device_write_preferred(size_t size) const { return supports_device_write(); } + /** * @brief Append the buffer content to the sink from a gpu address * - * @param[in] data Pointer to the buffer to be written into the sink object - * @param[in] size Number of bytes to write + * For optimal performance, should only be called when `is_device_write_preferred` returns `true`. + * Data sink implementations that don't support direct device writes don't need to override + * this function. * - * @return void + * @throws cudf::logic_error the object does not support direct device writes, i.e. + * `supports_device_write` returns `false`. + * + * @param gpu_data Pointer to the buffer to be written into the sink object + * @param size Number of bytes to write + * @param stream CUDA stream to use */ virtual void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) { - CUDF_FAIL("data_sink classes that support device_write must override this function."); + CUDF_FAIL("data_sink classes that support device_write must override it."); } /** * @brief Flush the data written into the sink - * - * @return void */ virtual void flush() = 0; diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 88f2bd187e2..8fcc045e6d2 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ #include #include +#include + #include #include #include @@ -50,12 +52,15 @@ class datasource { /** * @brief Returns the address of the data in the buffer. */ - virtual const uint8_t* data() const = 0; + virtual uint8_t const* data() const = 0; /** * @brief Base class destructor */ virtual ~buffer() {} + + template + static std::unique_ptr create(Container&& data_owner); }; /** @@ -147,37 +152,57 @@ class datasource { */ virtual bool supports_device_read() const { return false; } + /** + * @brief Estimates whether a direct device read would be more optimal for the given size. + * + * @param size Number of bytes to read + * @return whether the device read is expected to be more performant for the given size + */ + virtual bool is_device_read_preferred(size_t size) const { return supports_device_read(); } + /** * @brief Returns a device buffer with a subset of data from the source. * + * For optimal performance, should only be called when `is_device_read_preferred` returns `true`. * Data source implementations that don't support direct device reads don't need to override this * function. * - * @param[in] offset Bytes from the start - * @param[in] size Bytes to read + * @throws cudf::logic_error the object does not support direct device reads, i.e. + * `supports_device_read` returns `false`. + * + * @param offset Number of bytes from the start + * @param size Number of bytes to read + * @param stream CUDA stream to use * * @return The data buffer in the device memory */ - virtual std::unique_ptr device_read(size_t offset, size_t size) + virtual std::unique_ptr device_read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) { - CUDF_FAIL("datasource classes that support device_read must override this function."); + CUDF_FAIL("datasource classes that support device_read must override it."); } /** * @brief Reads a selected range into a preallocated device buffer * + * For optimal performance, should only be called when `is_device_read_preferred` returns `true`. * Data source implementations that don't support direct device reads don't need to override this * function. * - * @param[in] offset Bytes from the start - * @param[in] size Bytes to read - * @param[in] dst Address of the existing device memory + * @throws cudf::logic_error when the object does not support direct device reads, i.e. + * `supports_device_read` returns `false`. + * + * @param offset Number of bytes from the start + * @param size Number of bytes to read + * @param dst Address of the existing device memory + * @param stream CUDA stream to use * * @return The number of bytes read (can be smaller than size) */ - virtual size_t device_read(size_t offset, size_t size, uint8_t* dst) + virtual size_t device_read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) { - CUDF_FAIL("datasource classes that support device_read must override this function."); + CUDF_FAIL("datasource classes that support device_read must override it."); } /** @@ -205,14 +230,57 @@ class datasource { size_t size() const override { return _size; } - const uint8_t* data() const override { return _data; } + uint8_t const* data() const override { return _data; } private: uint8_t* const _data; size_t const _size; }; + + /** + * @brief Derived implementation of `buffer` that owns the data. + * + * Can use different container types to hold the data buffer. + * + * @tparam Container Type of the container object that owns the data + */ + template + class owning_buffer : public buffer { + public: + /** + * @brief Moves the input container into the newly created object. + */ + owning_buffer(Container&& data_owner) + : _data(std::move(data_owner)), _data_ptr(_data.data()), _size(_data.size()) + { + } + + /** + * @brief Moves the input container into the newly created object, and exposes a subspan of the + * buffer. + */ + owning_buffer(Container&& data_owner, uint8_t const* data_ptr, size_t size) + : _data(std::move(data_owner)), _data_ptr(data_ptr), _size(size) + { + } + + size_t size() const override { return _size; } + + uint8_t const* data() const override { return static_cast(_data_ptr); } + + private: + Container _data; + void const* _data_ptr; + size_t _size; + }; }; +template +std::unique_ptr datasource::buffer::create(Container&& data_owner) +{ + return std::make_unique>(std::move(data_owner)); +} + /** * @brief Implementation class for reading from an Apache Arrow file. The file * could be a memory-mapped file or other implementation supported by Arrow. @@ -230,7 +298,7 @@ class arrow_io_source : public datasource { { } size_t size() const override { return arrow_buffer->size(); } - const uint8_t* data() const override { return arrow_buffer->data(); } + uint8_t const* data() const override { return arrow_buffer->data(); } }; public: diff --git a/cpp/include/cudf/lists/explode.hpp b/cpp/include/cudf/lists/explode.hpp new file mode 100644 index 00000000000..156d4b9275d --- /dev/null +++ b/cpp/include/cudf/lists/explode.hpp @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace cudf { + +/** + * @brief Explodes a list column's elements. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [5, 100], + * [10, 100], + * [15, 100], + * [20, 200], + * [25, 200], + * [30, 300], + * ``` + * + * Nulls and empty lists propagate in different ways depending on what is null or empty. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [5, 100], + * [null, 100], + * [15, 100], + * ``` + * Note that null lists are not included in the resulting table, but nulls inside + * lists and empty lists will be represented with a null entry for that column in that row. + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr explode( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements and includes a position column. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. A position + * column is added that has the index inside the original list for each row. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [0, 5, 100], + * [1, 10, 100], + * [2, 15, 100], + * [0, 20, 200], + * [1, 25, 200], + * [0, 30, 300], + * ``` + * + * Nulls and empty lists propagate in different ways depending on what is null or empty. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [0, 5, 100], + * [1, null, 100], + * [2, 15, 100], + * ``` + * Note that null lists are not included in the resulting table, but nulls inside + * lists and empty lists will be represented with a null entry for that column in that row. + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with exploded value and position. The column order of return table is + * [cols before explode_input, explode_position, explode_value, cols after explode_input]. + */ +std::unique_ptr
explode_position( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements retaining any null entries or empty lists inside. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [5, 100], + * [10, 100], + * [15, 100], + * [20, 200], + * [25, 200], + * [30, 300], + * ``` + * + * Nulls and empty lists propagate as null entries in the result. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [5, 100], + * [null, 100], + * [15, 100], + * [null, 200], + * [null, 300], + * ``` + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr
explode_outer( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Explodes a list column's elements retaining any null entries or empty lists and includes a + *position column. + * + * Any list is exploded, which means the elements of the list in each row are expanded into new rows + * in the output. The corresponding rows for other columns in the input are duplicated. A position + * column is added that has the index inside the original list for each row. Example: + * ``` + * [[5,10,15], 100], + * [[20,25], 200], + * [[30], 300], + * returns + * [0, 5, 100], + * [1, 10, 100], + * [2, 15, 100], + * [0, 20, 200], + * [1, 25, 200], + * [0, 30, 300], + * ``` + * + * Nulls and empty lists propagate as null entries in the result. + *``` + * [[5,null,15], 100], + * [null, 200], + * [[], 300], + * returns + * [0, 5, 100], + * [1, null, 100], + * [2, 15, 100], + * [0, null, 200], + * [0, null, 300], + * ``` + * + * @param input_table Table to explode. + * @param explode_column_idx Column index to explode inside the table. + * @param mr Device memory resource used to allocate the returned column's device memory. + * + * @return A new table with explode_col exploded. + */ +std::unique_ptr
explode_outer_position( + table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** @} */ // end of group + +} // namespace cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index a6030f31e6d..74e4ebb8d05 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -97,92 +97,6 @@ std::unique_ptr byte_cast( flip_endianness endian_configuration, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Explodes a list column's elements. - * - * Any list is exploded, which means the elements of the list in each row are expanded into new rows - * in the output. The corresponding rows for other columns in the input are duplicated. Example: - * ``` - * [[5,10,15], 100], - * [[20,25], 200], - * [[30], 300], - * returns - * [5, 100], - * [10, 100], - * [15, 100], - * [20, 200], - * [25, 200], - * [30, 300], - * ``` - * - * Nulls and empty lists propagate in different ways depending on what is null or empty. - *``` - * [[5,null,15], 100], - * [null, 200], - * [[], 300], - * returns - * [5, 100], - * [null, 100], - * [15, 100], - * ``` - * Note that null lists are not included in the resulting table, but nulls inside - * lists and empty lists will be represented with a null entry for that column in that row. - * - * @param input_table Table to explode. - * @param explode_column_idx Column index to explode inside the table. - * @param mr Device memory resource used to allocate the returned column's device memory. - * - * @return A new table with explode_col exploded. - */ -std::unique_ptr
explode( - table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Explodes a list column's elements and includes a position column. - * - * Any list is exploded, which means the elements of the list in each row are expanded into new rows - * in the output. The corresponding rows for other columns in the input are duplicated. A position - * column is added that has the index inside the original list for each row. Example: - * ``` - * [[5,10,15], 100], - * [[20,25], 200], - * [[30], 300], - * returns - * [0, 5, 100], - * [1, 10, 100], - * [2, 15, 100], - * [0, 20, 200], - * [1, 25, 200], - * [0, 30, 300], - * ``` - * - * Nulls and empty lists propagate in different ways depending on what is null or empty. - *``` - * [[5,null,15], 100], - * [null, 200], - * [[], 300], - * returns - * [0, 5, 100], - * [1, null, 100], - * [2, 15, 100], - * ``` - * Note that null lists are not included in the resulting table, but nulls inside - * lists and empty lists will be represented with a null entry for that column in that row. - * - * @param input_table Table to explode. - * @param explode_column_idx Column index to explode inside the table. - * @param mr Device memory resource used to allocate the returned column's device memory. - * - * @return A new table with exploded value and position. The column order of return table is - * [cols before explode_input, explode_position, explode_value, cols after explode_input]. - */ -std::unique_ptr
explode_position( - table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 300722920f4..1f5b6241850 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,82 +146,6 @@ std::unique_ptr filter_characters_of_type( string_character_types types_to_keep = string_character_types::ALL_TYPES, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to integers. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7'] - * b = s.is_integer(s) - * b is [true, true, false, false, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to integers. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_integer(strings_column_view const& strings); - -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to floats. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9eE.]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] - * b = s.is_float(s) - * b is [true, true, false, false, true, true, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to floats. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9eE.]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_float(strings_column_view const& strings); - /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index cb4746dbf40..d1e00b36f6f 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,6 +68,30 @@ std::unique_ptr from_floats( column_view const& floats, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to floats. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9eE.]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] + * b = s.is_float(s) + * b is [true, true, false, false, true, true, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 8f42deb380d..1e2fa80b129 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,6 +73,30 @@ std::unique_ptr from_integers( column_view const& integers, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to integers. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7'] + * b = s.is_integer(s) + * b is [true, true, false, false, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a new integer numeric column parsing hexadecimal values from the * provided strings column. diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 28da8ef4324..988fa552100 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -31,15 +31,60 @@ #include namespace cudf { +namespace strings { +namespace detail { -template -constexpr inline bool is_signed_iterator() +/** + * @brief Returns a new chars column using the specified indices to select + * strings from the input iterator. + * + * This uses a character-parallel gather CUDA kernel that performs very + * well on a strings column with long strings (e.g. average > 64 bytes). + * + * @tparam StringIterator Iterator should produce `string_view` objects. + * @tparam MapIterator Iterator for retrieving integer indices of the `StringIterator`. + * + * @param strings_begin Start of the iterator to retrieve `string_view` instances + * @param map_begin Start of index iterator. + * @param map_end End of index iterator. + * @param offsets The offset values to be associated with the output chars column. + * @param chars_bytes The total number of bytes for the output chars column. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return New chars column fit for a strings column. + */ +template +std::unique_ptr gather_chars(StringIterator strings_begin, + MapIterator map_begin, + MapIterator map_end, + cudf::device_span const offsets, + size_type chars_bytes, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return std::is_signed::value_type>::value; -} + auto const output_count = std::distance(map_begin, map_end); + if (output_count == 0) return make_empty_column(data_type{type_id::INT8}); -namespace strings { -namespace detail { + auto chars_column = create_chars_child_column(output_count, 0, chars_bytes, stream, mr); + auto const d_chars = chars_column->mutable_view().template data(); + + auto gather_chars_fn = [strings_begin, map_begin, offsets] __device__(size_type out_idx) -> char { + auto const out_row = + thrust::prev(thrust::upper_bound(thrust::seq, offsets.begin(), offsets.end(), out_idx)); + auto const row_idx = map_begin[thrust::distance(offsets.begin(), out_row)]; // get row index + auto const d_str = strings_begin[row_idx]; // get row's string + auto const offset = out_idx - *out_row; // get string's char + return d_str.data()[offset]; + }; + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chars_bytes), + d_chars, + gather_chars_fn); + + return chars_column; +} /** * @brief Returns a new strings column using the specified indices to select @@ -107,29 +152,15 @@ std::unique_ptr gather( rmm::exec_policy(stream), d_out_offsets, d_out_offsets + output_count + 1, d_out_offsets); // build chars column - size_type const out_chars_bytes = static_cast(total_bytes); - auto out_chars_column = create_chars_child_column(output_count, 0, out_chars_bytes, stream, mr); - auto const d_out_chars = out_chars_column->mutable_view().template data(); - - // fill in chars cudf::device_span const d_out_offsets_span(d_out_offsets, output_count + 1); - auto const d_in_chars = (strings_count > 0) ? strings.chars().data() : nullptr; - auto gather_chars_fn = - [d_out_offsets_span, begin, d_in_offsets, d_in_chars] __device__(size_type out_char_idx) { - // find output row index for this output char index - auto const next_row_ptr = thrust::upper_bound( - thrust::seq, d_out_offsets_span.begin(), d_out_offsets_span.end(), out_char_idx); - auto const out_row_idx = thrust::distance(d_out_offsets_span.begin(), next_row_ptr) - 1; - auto const str_char_offset = out_char_idx - d_out_offsets_span[out_row_idx]; - auto const in_row_idx = begin[out_row_idx]; - auto const in_char_offset = d_in_offsets[in_row_idx] + str_char_offset; - return d_in_chars[in_char_offset]; - }; - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(out_chars_bytes), - d_out_chars, - gather_chars_fn); + auto const d_strings = column_device_view::create(strings.parent(), stream); + auto out_chars_column = gather_chars(d_strings->begin(), + begin, + end, + d_out_offsets_span, + static_cast(total_bytes), + stream, + mr); return make_strings_column(output_count, std::move(out_offsets_column), diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 8e843c555c5..932f7eb0926 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,7 @@ #include #include +#include #include #include @@ -34,7 +36,27 @@ namespace cudf { namespace strings { namespace detail { -// Create a strings-type column from iterators of pointer/size pairs +/** + * @brief Average string byte-length threshold for deciding character-level + * vs. row-level parallel algorithm. + * + * This value was determined by running the factory_benchmark against different + * string lengths and observing the point where the performance is faster for + * long strings. + */ +constexpr size_type FACTORY_BYTES_PER_ROW_THRESHOLD = 64; + +/** + * @brief Create a strings-type column from iterators of pointer/size pairs + * + * @tparam IndexPairIterator iterator over type `pair` values + * + * @param begin First string row (inclusive) + * @param end Last string row (exclusive) + * @param stream CUDA stream used for device memory operations + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ template std::unique_ptr make_strings_column(IndexPairIterator begin, IndexPairIterator end, @@ -51,7 +73,7 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, auto size_checker = [] __device__(string_index_pair const& item) { return (item.first != nullptr) ? item.second : 0; }; - size_t bytes = thrust::transform_reduce( + size_t const bytes = thrust::transform_reduce( rmm::exec_policy(stream), begin, end, size_checker, 0, thrust::plus()); CUDF_EXPECTS(bytes < static_cast(std::numeric_limits::max()), "total size of strings is too large for cudf column"); @@ -65,26 +87,49 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); // create null mask - auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; - auto new_nulls = cudf::detail::valid_if(begin, end, validator, stream, mr); - auto null_count = new_nulls.second; + auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; + auto new_nulls = cudf::detail::valid_if(begin, end, validator, stream, mr); + auto const null_count = new_nulls.second; auto null_mask = (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column - auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - auto copy_chars = [d_chars] __device__(auto item) { - string_index_pair str = thrust::get<0>(item); - size_type offset = thrust::get<1>(item); - if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); - }; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_tuple(begin, offsets_column->view().template begin())), - strings_count, - copy_chars); + std::unique_ptr chars_column = [&] { + // use a character-parallel kernel for long string lengths + auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); + if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { + auto const d_offsets = + device_span{offsets_column->view().template data(), + static_cast(offsets_column->size())}; + auto const str_begin = thrust::make_transform_iterator(begin, [] __device__(auto ip) { + return string_view{ip.first, ip.second}; + }); + + return gather_chars(str_begin, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + d_offsets, + static_cast(bytes), + stream, + mr); + } else { + // this approach is 2-3x faster for a large number of smaller string lengths + auto chars_column = + strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); + auto d_chars = chars_column->mutable_view().template data(); + auto copy_chars = [d_chars] __device__(auto item) { + string_index_pair const str = thrust::get<0>(item); + size_type const offset = thrust::get<1>(item); + if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); + }; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_zip_iterator(thrust::make_tuple( + begin, offsets_column->view().template begin())), + strings_count, + copy_chars); + return chars_column; + } + }(); return make_strings_column(strings_count, std::move(offsets_column), @@ -95,7 +140,22 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, mr); } -// Create a strings-type column from iterators to chars, offsets, and bitmask. +/** + * @brief Create a strings-type column from iterators to chars, offsets, and bitmask. + * + * @tparam CharIterator iterator over character bytes (int8) + * @tparam OffsetIterator iterator over offset values (size_type) + * + * @param chars_begin First character byte (inclusive) + * @param chars_end Last character byte (exclusive) + * @param offset_begin First offset value (inclusive) + * @param offset_end Last offset value (exclusive) + * @param null_count Number of null rows + * @param null_mask The validity bitmask in Arrow format + * @param stream CUDA stream used for device memory operations + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ template std::unique_ptr make_strings_column(CharIterator chars_begin, CharIterator chars_end, diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 553cf5e9096..4571362076c 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -110,6 +110,27 @@ class table { */ std::vector> release(); + /** + * @brief Returns a table_view built from a range of column indices. + * + * @throws std::out_of_range + * If any index is outside [0, num_columns()) + * + * @param begin Beginning of the range + * @param end Ending of the range + * @return A table_view consisting of columns from the original table + * specified by the elements of `column_indices` + */ + + template + table_view select(InputIterator begin, InputIterator end) const + { + std::vector columns(std::distance(begin, end)); + std::transform( + begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); }); + return table_view(columns); + } + /** * @brief Returns a table_view with set of specified columns. * @@ -120,7 +141,10 @@ class table { * @return A table_view consisting of columns from the original table * specified by the elements of `column_indices` */ - table_view select(std::vector const& column_indices) const; + table_view select(std::vector const& column_indices) const + { + return select(column_indices.begin(), column_indices.end()); + }; /** * @brief Returns a reference to the specified column diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 22f2073f73c..083366cc310 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -174,6 +174,25 @@ class table_view : public detail::table_view_base { */ table_view(std::vector const& views); + /** + * @brief Returns a table_view built from a range of column indices. + * + * @throws std::out_of_range + * If any index is outside [0, num_columns()) + * + * @param begin Beginning of the range + * @param end Ending of the range + * @return A table_view consisting of columns from the original table + * specified by the elements of `column_indices` + */ + template + table_view select(InputIterator begin, InputIterator end) const + { + std::vector columns(std::distance(begin, end)); + std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); }); + return table_view(columns); + } + /** * @brief Returns a table_view with set of specified columns. * diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index e045476ea77..1e0d45d081d 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -224,6 +224,18 @@ constexpr inline bool is_unsigned(data_type type) return cudf::type_dispatcher(type, is_unsigned_impl{}); } +/** + * @brief Indicates whether the `Iterator` value type is unsigned. + * + * @tparam Iterator The type to verify + * @return true if the iterator's value type is unsigned + */ +template +constexpr inline bool is_signed_iterator() +{ + return std::is_signed::value_type>::value; +} + /** * @brief Indicates whether the type `T` is a floating point type. * diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index dda2e0704f6..f7e153d71f4 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -416,36 +416,28 @@ void writer::impl::write_chunked(strings_column_view const& str_column_view, auto total_num_bytes = strings_column.chars_size(); char const* ptr_all_bytes = strings_column.chars().data(); - if (out_sink_->supports_device_write()) { - // host algorithm call, but the underlying call - // is a device_write taking a device buffer; - // + if (out_sink_->is_device_write_preferred(total_num_bytes)) { + // Direct write from device memory out_sink_->device_write(ptr_all_bytes, total_num_bytes, stream); - out_sink_->device_write(newline.data(), - newline.size(), - stream); // needs newline at the end, to separate from next chunk } else { - // no device write possible; - // - // copy the bytes to host, too: - // + // copy the bytes to host to write them out thrust::host_vector h_bytes(total_num_bytes); CUDA_TRY(cudaMemcpyAsync(h_bytes.data(), ptr_all_bytes, total_num_bytes * sizeof(char), cudaMemcpyDeviceToHost, stream.value())); - stream.synchronize(); - // host algorithm call, where the underlying call - // is also host_write taking a host buffer; - // - char const* ptr_h_bytes = h_bytes.data(); - out_sink_->host_write(ptr_h_bytes, total_num_bytes); + out_sink_->host_write(h_bytes.data(), total_num_bytes); + } + + // Needs newline at the end, to separate from next chunk + if (out_sink_->is_device_write_preferred(newline.size())) { + out_sink_->device_write(newline.data(), newline.size(), stream); + } else { out_sink_->host_write(options_.get_line_terminator().data(), - options_.get_line_terminator() - .size()); // needs newline at the end, to separate from next chunk + options_.get_line_terminator().size()); } } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 80935e3fbd5..61adef26dab 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -139,7 +139,7 @@ struct orc_stream_info { } uint64_t offset; // offset in file size_t dst_pos; // offset in memory relative to start of compressed stripe data - uint32_t length; // length in file + size_t length; // length in file uint32_t gdf_idx; // column index uint32_t stripe_idx; // stripe index }; diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 4bca725a16b..1ff752034ad 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1455,8 +1455,9 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); // Decode data streams { - uint32_t numvals = s->top.data.max_vals, secondary_val; - uint32_t vals_skipped = 0; + uint32_t numvals = s->top.data.max_vals; + uint64_t secondary_val = 0; + uint32_t vals_skipped = 0; if (s->is_string || s->chunk.type_kind == TIMESTAMP) { // For these data types, we have a secondary unsigned 32-bit data stream orc_bytestream_s *bs = (is_dictionary(s->chunk.encoding_kind)) ? &s->bs : &s->bs2; @@ -1471,9 +1472,15 @@ __global__ void __launch_bounds__(block_size) } if (numvals > ofs) { if (is_rlev1(s->chunk.encoding_kind)) { - numvals = ofs + Integer_RLEv1(bs, &s->u.rlev1, &s->vals.u32[ofs], numvals - ofs, t); + if (s->chunk.type_kind == TIMESTAMP) + numvals = ofs + Integer_RLEv1(bs, &s->u.rlev1, &s->vals.u64[ofs], numvals - ofs, t); + else + numvals = ofs + Integer_RLEv1(bs, &s->u.rlev1, &s->vals.u32[ofs], numvals - ofs, t); } else { - numvals = ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u32[ofs], numvals - ofs, t); + if (s->chunk.type_kind == TIMESTAMP) + numvals = ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u64[ofs], numvals - ofs, t); + else + numvals = ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u32[ofs], numvals - ofs, t); } __syncthreads(); if (numvals <= ofs && t >= ofs && t < s->top.data.max_vals) { s->vals.u32[t] = 0; } @@ -1487,15 +1494,24 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); if (t == 0) { s->top.data.index.run_pos[cid] = 0; } numvals -= vals_skipped; - if (t < numvals) { secondary_val = s->vals.u32[vals_skipped + t]; } + if (t < numvals) { + secondary_val = (s->chunk.type_kind == TIMESTAMP) ? s->vals.u64[vals_skipped + t] + : s->vals.u32[vals_skipped + t]; + } __syncthreads(); - if (t < numvals) { s->vals.u32[t] = secondary_val; } + if (t < numvals) { + if (s->chunk.type_kind == TIMESTAMP) + s->vals.u64[t] = secondary_val; + else + s->vals.u32[t] = secondary_val; + } } } __syncthreads(); // For strings with direct encoding, we need to convert the lengths into an offset if (!is_dictionary(s->chunk.encoding_kind)) { - secondary_val = (t < numvals) ? s->vals.u32[t] : 0; + if (t < numvals) + secondary_val = (s->chunk.type_kind == TIMESTAMP) ? s->vals.u64[t] : s->vals.u32[t]; if (s->chunk.type_kind != TIMESTAMP) { lengths_to_positions(s->vals.u32, numvals, t); __syncthreads(); @@ -1693,7 +1709,7 @@ __global__ void __launch_bounds__(block_size) } case TIMESTAMP: { int64_t seconds = s->vals.i64[t + vals_skipped] + s->top.data.utc_epoch; - uint32_t nanos = secondary_val; + uint64_t nanos = secondary_val; nanos = (nanos >> 3) * kTimestampNanoScale[nanos & 7]; if (!tz_table.ttimes.empty()) { seconds += get_gmt_offset(tz_table.ttimes, tz_table.offsets, seconds); @@ -1716,7 +1732,7 @@ __global__ void __launch_bounds__(block_size) if (s->chunk.type_kind == TIMESTAMP) { int buffer_pos = s->top.data.max_vals; if (t >= buffer_pos && t < buffer_pos + s->top.data.buffered_count) { - s->vals.u32[t - buffer_pos] = secondary_val; + s->vals.u64[t - buffer_pos] = secondary_val; } } else if (s->chunk.type_kind == BOOLEAN && t < s->top.data.buffered_count) { s->vals.u8[t] = secondary_val; diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 88cad005817..aef32efaf6e 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -92,6 +92,7 @@ struct orcenc_state_s { union { uint8_t u8[2048]; uint32_t u32[1024]; + uint64_t u64[1024]; } lengths; }; @@ -101,6 +102,7 @@ static inline __device__ uint32_t zigzag(int32_t v) int32_t s = (v >> 31); return ((v ^ s) * 2) - s; } +static inline __device__ uint64_t zigzag(uint64_t v) { return v; } static inline __device__ uint64_t zigzag(int64_t v) { int64_t s = (v < 0) ? 1 : 0; @@ -286,24 +288,6 @@ static inline __device__ uint32_t StoreVarint(uint8_t *dst, uint64_t v) return bytecnt; } -static inline __device__ void intrle_minmax(int64_t &vmin, int64_t &vmax) -{ - vmin = INT64_MIN; - vmax = INT64_MAX; -} -// static inline __device__ void intrle_minmax(uint64_t &vmin, uint64_t &vmax) { vmin = UINT64_C(0); -// vmax = UINT64_MAX; } -static inline __device__ void intrle_minmax(int32_t &vmin, int32_t &vmax) -{ - vmin = INT32_MIN; - vmax = INT32_MAX; -} -static inline __device__ void intrle_minmax(uint32_t &vmin, uint32_t &vmax) -{ - vmin = UINT32_C(0); - vmax = UINT32_MAX; -} - template static inline __device__ void StoreBytesBigEndian(uint8_t *dst, T v, uint32_t w) { @@ -412,13 +396,9 @@ static __device__ uint32_t IntegerRLE(orcenc_state_s *s, // Find minimum and maximum values if (literal_run > 0) { // Find min & max - T vmin, vmax; + T vmin = (t < literal_run) ? v0 : std::numeric_limits::max(); + T vmax = (t < literal_run) ? v0 : std::numeric_limits::min(); uint32_t literal_mode, literal_w; - if (t < literal_run) { - vmin = vmax = v0; - } else { - intrle_minmax(vmax, vmin); - } vmin = block_reduce(temp_storage).Reduce(vmin, cub::Min()); __syncthreads(); vmax = block_reduce(temp_storage).Reduce(vmax, cub::Max()); @@ -652,6 +632,7 @@ __global__ void __launch_bounds__(block_size) typename cub::BlockReduce::TempStorage i32; typename cub::BlockReduce::TempStorage i64; typename cub::BlockReduce::TempStorage u32; + typename cub::BlockReduce::TempStorage u64; } temp_storage; orcenc_state_s *const s = &state_g; @@ -763,7 +744,7 @@ __global__ void __launch_bounds__(block_size) int64_t ts = static_cast(base)[row]; int32_t ts_scale = kTimeScale[min(s->chunk.scale, 9)]; int64_t seconds = ts / ts_scale; - int32_t nanos = (ts - seconds * ts_scale); + int64_t nanos = (ts - seconds * ts_scale); // There is a bug in the ORC spec such that for negative timestamps, it is understood // between the writer and reader that nanos will be adjusted to their positive component // but the negative seconds will be left alone. This means that -2.6 is encoded as @@ -786,7 +767,7 @@ __global__ void __launch_bounds__(block_size) } nanos = (nanos << 3) + zeroes; } - s->lengths.u32[nz_idx] = nanos; + s->lengths.u64[nz_idx] = nanos; break; } case STRING: @@ -897,6 +878,9 @@ __global__ void __launch_bounds__(block_size) uint32_t flush = (s->cur_row == s->chunk.num_rows) ? 1 : 0, n; switch (s->chunk.type_kind) { case TIMESTAMP: + n = IntegerRLE( + s, s->lengths.u64, s->nnz - s->numlengths, s->numlengths, flush, t, temp_storage.u64); + break; case STRING: n = IntegerRLE( s, s->lengths.u32, s->nnz - s->numlengths, s->numlengths, flush, t, temp_storage.u32); diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 1c99c99369b..eb5e90bbeec 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -476,7 +476,6 @@ orc_streams writer::impl::create_streams(host_span columns, break; case TypeKind::STRING: { bool enable_dict = enable_dictionary_; - size_t direct_data_size = 0; size_t dict_data_size = 0; size_t dict_strings = 0; size_t dict_lengths_div512 = 0; @@ -488,11 +487,15 @@ orc_streams writer::impl::create_streams(host_span columns, dict_lengths_div512 += (sd->num_strings + 0x1ff) >> 9; dict_data_size += sd->dict_char_count; } - direct_data_size += std::accumulate( - stripe.cbegin(), stripe.cend(), direct_data_size, [&](auto data_size, auto rg_idx) { - return data_size + column.host_dict_chunk(rg_idx)->string_char_count; - }); } + + auto const direct_data_size = + std::accumulate(stripe_bounds.front().cbegin(), + stripe_bounds.back().cend(), + size_t{0}, + [&](auto data_size, auto rg_idx) { + return data_size + column.host_dict_chunk(rg_idx)->string_char_count; + }); if (enable_dict) { uint32_t dict_bits = 0; for (dict_bits = 1; dict_bits < 32; dict_bits <<= 1) { diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 43d144ec980..f920aee1c29 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -184,7 +184,7 @@ struct ColumnChunkDesc { { } - uint8_t *compressed_data; // pointer to compressed column chunk data + uint8_t const *compressed_data; // pointer to compressed column chunk data size_t compressed_size; // total compressed data size for this chunk size_t num_values; // total number of values in this column size_t start_row; // starting row of this chunk diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index a7a02cc6108..16cf0877c23 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -822,7 +822,7 @@ void generate_depth_remappings(std::map, std::ve * @copydoc cudf::io::detail::parquet::read_column_chunks */ void reader::impl::read_column_chunks( - std::vector &page_data, + std::vector> &page_data, hostdevice_vector &chunks, // TODO const? size_t begin_chunk, size_t end_chunk, @@ -850,9 +850,15 @@ void reader::impl::read_column_chunks( next_chunk++; } if (io_size != 0) { - auto buffer = _sources[chunk_source_map[chunk]]->host_read(io_offset, io_size); - page_data[chunk] = rmm::device_buffer(buffer->data(), buffer->size(), stream); - uint8_t *d_compdata = static_cast(page_data[chunk].data()); + auto &source = _sources[chunk_source_map[chunk]]; + if (source->is_device_read_preferred(io_size)) { + page_data[chunk] = source->device_read(io_offset, io_size, stream); + } else { + auto const buffer = source->host_read(io_offset, io_size); + page_data[chunk] = + datasource::buffer::create(rmm::device_buffer(buffer->data(), buffer->size(), stream)); + } + auto d_compdata = page_data[chunk]->data(); do { chunks[chunk].compressed_data = d_compdata; d_compdata += chunks[chunk].compressed_size; @@ -1414,7 +1420,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, std::vector chunk_source_map(num_chunks); // Tracker for eventually deallocating compressed and uncompressed data - std::vector page_data(num_chunks); + std::vector> page_data(num_chunks); // Keep track of column chunk file offsets std::vector column_chunk_offsets(num_chunks); @@ -1516,10 +1522,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, decomp_page_data = decompress_page_data(chunks, pages, stream); // Free compressed data for (size_t c = 0; c < chunks.size(); c++) { - if (chunks[c].codec != parquet::Compression::UNCOMPRESSED && page_data[c].size() != 0) { - page_data[c].resize(0); - page_data[c].shrink_to_fit(); - } + if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); } } } diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 137fca03bfd..ca200936134 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,7 +91,7 @@ class reader::impl { * @param stream CUDA stream used for device memory operations and kernel launches. * */ - void read_column_chunks(std::vector &page_data, + void read_column_chunks(std::vector> &page_data, hostdevice_vector &chunks, size_t begin_chunk, size_t end_chunk, diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index a645ca0fd91..dd68bc50043 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1108,19 +1108,7 @@ void writer::impl::write(table_view const &table) num_stats_bfr); } - auto host_bfr = [&]() { - // if the writer supports device_write(), we don't need this scratch space - if (out_sink_->supports_device_write()) { - return pinned_buffer{nullptr, cudaFreeHost}; - } else { - return pinned_buffer{[](size_t size) { - uint8_t *ptr = nullptr; - CUDA_TRY(cudaMallocHost(&ptr, size)); - return ptr; - }(max_chunk_bfr_size), - cudaFreeHost}; - } - }(); + pinned_buffer host_bfr{nullptr, cudaFreeHost}; // Encode row groups in batches for (uint32_t b = 0, r = 0, global_r = global_rowgroup_base; b < (uint32_t)batch_list.size(); @@ -1155,7 +1143,7 @@ void writer::impl::write(table_view const &table) dev_bfr = ck->uncompressed_bfr; } - if (out_sink_->supports_device_write()) { + if (out_sink_->is_device_write_preferred(ck->compressed_size)) { // let the writer do what it wants to retrieve the data from the gpu. out_sink_->device_write(dev_bfr + ck->ck_stat_size, ck->compressed_size, stream); // we still need to do a (much smaller) memcpy for the statistics. @@ -1170,6 +1158,14 @@ void writer::impl::write(table_view const &table) stream.synchronize(); } } else { + if (!host_bfr) { + host_bfr = pinned_buffer{[](size_t size) { + uint8_t *ptr = nullptr; + CUDA_TRY(cudaMallocHost(&ptr, size)); + return ptr; + }(max_chunk_bfr_size), + cudaFreeHost}; + } // copy the full data CUDA_TRY(cudaMemcpyAsync(host_bfr.get(), dev_bfr, diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 48558005303..10af7bcb0bd 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include #include @@ -29,24 +30,44 @@ namespace io { class file_sink : public data_sink { public: explicit file_sink(std::string const& filepath) + : _cufile_out(detail::make_cufile_output(filepath)) { - outfile_.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc); - CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file"); + _output_stream.open(filepath, std::ios::out | std::ios::binary | std::ios::trunc); + CUDF_EXPECTS(_output_stream.is_open(), "Cannot open output file"); } virtual ~file_sink() { flush(); } void host_write(void const* data, size_t size) override { - outfile_.write(static_cast(data), size); + _output_stream.seekp(_bytes_written); + _output_stream.write(static_cast(data), size); + _bytes_written += size; } - void flush() override { outfile_.flush(); } + void flush() override { _output_stream.flush(); } - size_t bytes_written() override { return outfile_.tellp(); } + size_t bytes_written() override { return _bytes_written; } + + bool supports_device_write() const override { return _cufile_out != nullptr; } + + bool is_device_write_preferred(size_t size) const override + { + return _cufile_out != nullptr && _cufile_out->is_cufile_io_preferred(size); + } + + void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override + { + if (!supports_device_write()) CUDF_FAIL("Device writes are not supported for this file."); + + _cufile_out->write(gpu_data, _bytes_written, size); + _bytes_written += size; + } private: - std::ofstream outfile_; + std::ofstream _output_stream; + size_t _bytes_written = 0; + std::unique_ptr _cufile_out; }; /** @@ -77,25 +98,25 @@ class host_buffer_sink : public data_sink { */ class void_sink : public data_sink { public: - explicit void_sink() : bytes_written_(0) {} + explicit void_sink() : _bytes_written(0) {} virtual ~void_sink() {} - void host_write(void const* data, size_t size) override { bytes_written_ += size; } + void host_write(void const* data, size_t size) override { _bytes_written += size; } bool supports_device_write() const override { return true; } void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream) override { - bytes_written_ += size; + _bytes_written += size; } void flush() override {} - size_t bytes_written() override { return bytes_written_; } + size_t bytes_written() override { return _bytes_written; } private: - size_t bytes_written_; + size_t _bytes_written; }; class user_sink_wrapper : public data_sink { diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 74163d023be..3f2884d5b7d 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,14 @@ * limitations under the License. */ +#include + #include #include -#include -#include #include -#include #include +#include namespace cudf { namespace io { @@ -34,12 +34,6 @@ namespace io { * mapping a subset of the file where the starting offset may not be zero. */ class memory_mapped_source : public datasource { - struct file_wrapper { - const int fd = -1; - explicit file_wrapper(const char *filepath) : fd(open(filepath, O_RDONLY)) {} - ~file_wrapper() { close(fd); } - }; - class memory_mapped_buffer : public buffer { size_t _size = 0; uint8_t *_data = nullptr; @@ -52,77 +46,99 @@ class memory_mapped_source : public datasource { public: explicit memory_mapped_source(const char *filepath, size_t offset, size_t size) + : _cufile_in(detail::make_cufile_input(filepath)) { - auto const file = file_wrapper(filepath); - CUDF_EXPECTS(file.fd != -1, "Cannot open file"); - - struct stat st; - CUDF_EXPECTS(fstat(file.fd, &st) != -1, "Cannot query file size"); - file_size_ = static_cast(st.st_size); - - if (file_size_ != 0) { map(file.fd, offset, size); } + auto const file = detail::file_wrapper(filepath, O_RDONLY); + _file_size = file.size(); + if (_file_size != 0) { map(file.desc(), offset, size); } } virtual ~memory_mapped_source() { - if (map_addr_ != nullptr) { munmap(map_addr_, map_size_); } + if (_map_addr != nullptr) { munmap(_map_addr, _map_size); } } std::unique_ptr host_read(size_t offset, size_t size) override { - CUDF_EXPECTS(offset >= map_offset_, "Requested offset is outside mapping"); + CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping"); // Clamp length to available data in the mapped region - auto const read_size = std::min(size, map_size_ - (offset - map_offset_)); + auto const read_size = std::min(size, _map_size - (offset - _map_offset)); return std::make_unique( - static_cast(map_addr_) + (offset - map_offset_), read_size); + static_cast(_map_addr) + (offset - _map_offset), read_size); } size_t host_read(size_t offset, size_t size, uint8_t *dst) override { - CUDF_EXPECTS(offset >= map_offset_, "Requested offset is outside mapping"); + CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping"); // Clamp length to available data in the mapped region - auto const read_size = std::min(size, map_size_ - (offset - map_offset_)); + auto const read_size = std::min(size, _map_size - (offset - _map_offset)); - auto const src = static_cast(map_addr_) + (offset - map_offset_); + auto const src = static_cast(_map_addr) + (offset - _map_offset); std::memcpy(dst, src, read_size); return read_size; } - size_t size() const override { return file_size_; } + bool supports_device_read() const override { return _cufile_in != nullptr; } + + bool is_device_read_preferred(size_t size) const + { + return _cufile_in != nullptr && _cufile_in->is_cufile_io_preferred(size); + } + + std::unique_ptr device_read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override + { + if (!supports_device_read()) CUDF_FAIL("Device reads are not supported for this file."); + + auto const read_size = std::min(size, _map_size - (offset - _map_offset)); + return _cufile_in->read(offset, read_size, stream); + } + + size_t device_read(size_t offset, + size_t size, + uint8_t *dst, + rmm::cuda_stream_view stream) override + { + if (!supports_device_read()) CUDF_FAIL("Device reads are not supported for this file."); + auto const read_size = std::min(size, _map_size - (offset - _map_offset)); + return _cufile_in->read(offset, read_size, dst, stream); + } + + size_t size() const override { return _file_size; } private: void map(int fd, size_t offset, size_t size) { - CUDF_EXPECTS(offset < file_size_, "Offset is past end of file"); + CUDF_EXPECTS(offset < _file_size, "Offset is past end of file"); // Offset for `mmap()` must be page aligned - auto const map_offset = offset & ~(sysconf(_SC_PAGESIZE) - 1); + _map_offset = offset & ~(sysconf(_SC_PAGESIZE) - 1); // Clamp length to available data in the file if (size == 0) { - size = file_size_ - offset; + size = _file_size - offset; } else { - if ((offset + size) > file_size_) { size = file_size_ - offset; } + if ((offset + size) > _file_size) { size = _file_size - offset; } } // Size for `mmap()` needs to include the page padding - const auto map_size = size + (offset - map_offset); + _map_size = size + (offset - _map_offset); // Check if accessing a region within already mapped area - map_addr_ = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, fd, map_offset); - CUDF_EXPECTS(map_addr_ != MAP_FAILED, "Cannot create memory mapping"); - map_offset_ = map_offset; - map_size_ = map_size; + _map_addr = mmap(nullptr, _map_size, PROT_READ, MAP_PRIVATE, fd, _map_offset); + CUDF_EXPECTS(_map_addr != MAP_FAILED, "Cannot create memory mapping"); } private: - size_t file_size_ = 0; - void *map_addr_ = nullptr; - size_t map_size_ = 0; - size_t map_offset_ = 0; + size_t _file_size = 0; + void *_map_addr = nullptr; + size_t _map_size = 0; + size_t _map_offset = 0; + std::unique_ptr _cufile_in; }; /** @@ -148,14 +164,19 @@ class user_datasource_wrapper : public datasource { bool supports_device_read() const override { return source->supports_device_read(); } - size_t device_read(size_t offset, size_t size, uint8_t *dst) override + size_t device_read(size_t offset, + size_t size, + uint8_t *dst, + rmm::cuda_stream_view stream) override { - return source->device_read(offset, size, dst); + return source->device_read(offset, size, dst, stream); } - std::unique_ptr device_read(size_t offset, size_t size) override + std::unique_ptr device_read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override { - return source->device_read(offset, size); + return source->device_read(offset, size, stream); } size_t size() const override { return source->size(); } diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp new file mode 100644 index 00000000000..22ff057cbc1 --- /dev/null +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include + +#include + +#include + +namespace cudf { +namespace io { +namespace detail { + +file_wrapper::file_wrapper(std::string const &filepath, int flags) + : fd(open(filepath.c_str(), flags)) +{ + CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); +} + +file_wrapper::file_wrapper(std::string const &filepath, int flags, mode_t mode) + : fd(open(filepath.c_str(), flags, mode)) +{ + CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); +} + +file_wrapper::~file_wrapper() { close(fd); } + +long file_wrapper::size() const +{ + if (_size < 0) { + struct stat st; + CUDF_EXPECTS(fstat(fd, &st) != -1, "Cannot query file size"); + _size = static_cast(st.st_size); + } + return _size; +} + +#ifdef CUFILE_FOUND + +/** + * @brief Class that manages cuFile configuration. + */ +class cufile_config { + std::string const default_policy = "OFF"; + std::string const json_path_env_var = "CUFILE_ENV_PATH_JSON"; + + std::string const policy = default_policy; + temp_directory tmp_config_dir{"cudf_cufile_config"}; + + std::string getenv_or(std::string const &env_var_name, std::string const &default_val) + { + auto const env_val = std::getenv(env_var_name.c_str()); + return (env_val == nullptr) ? default_val : std::string(env_val); + } + + cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", default_policy)} + { + if (is_enabled()) { + // Modify the config file based on the policy + auto const config_file_path = getenv_or(json_path_env_var, "/etc/cufile.json"); + std::ifstream user_config_file(config_file_path); + // Modified config file is stored in a temporary directory + auto const cudf_config_path = tmp_config_dir.path() + "/cufile.json"; + std::ofstream cudf_config_file(cudf_config_path); + + std::string line; + while (std::getline(user_config_file, line)) { + std::string const tag = "\"allow_compat_mode\""; + if (line.find(tag) != std::string::npos) { + // TODO: only replace the true/false value + // Enable compatiblity mode when cuDF does not fall back to host path + cudf_config_file << tag << ": " << (is_required() ? "true" : "false") << ",\n"; + } else { + cudf_config_file << line << '\n'; + } + + // Point libcufile to the modified config file + CUDF_EXPECTS(setenv(json_path_env_var.c_str(), cudf_config_path.c_str(), 0) == 0, + "Failed to set the cuFile config file environment variable."); + } + } + } + + public: + /** + * @brief Returns true when cuFile use is enabled. + */ + bool is_enabled() const { return policy == "ALWAYS" or policy == "GDS"; } + + /** + * @brief Returns true when cuDF should not fall back to host IO. + */ + bool is_required() const { return policy == "ALWAYS"; } + + static cufile_config const *instance() + { + static cufile_config _instance; + return &_instance; + } +}; + +/** + * @brief Class that dynamically loads the cuFile library and manages the cuFile driver. + */ +class cufile_shim { + private: + cufile_shim(); + + void *cf_lib = nullptr; + decltype(cuFileDriverOpen) *driver_open = nullptr; + decltype(cuFileDriverClose) *driver_close = nullptr; + + std::unique_ptr init_error; + auto is_valid() const noexcept { return init_error == nullptr; } + + public: + cufile_shim(cufile_shim const &) = delete; + cufile_shim &operator=(cufile_shim const &) = delete; + + static cufile_shim const *instance(); + + ~cufile_shim() + { + driver_close(); + dlclose(cf_lib); + } + + decltype(cuFileHandleRegister) *handle_register = nullptr; + decltype(cuFileHandleDeregister) *handle_deregister = nullptr; + decltype(cuFileRead) *read = nullptr; + decltype(cuFileWrite) *write = nullptr; +}; + +cufile_shim::cufile_shim() +{ + try { + cf_lib = dlopen("libcufile.so", RTLD_NOW); + driver_open = reinterpret_cast(dlsym(cf_lib, "cuFileDriverOpen")); + CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol"); + driver_close = reinterpret_cast(dlsym(cf_lib, "cuFileDriverClose")); + CUDF_EXPECTS(driver_close != nullptr, "could not find cuFile cuFileDriverClose symbol"); + handle_register = + reinterpret_cast(dlsym(cf_lib, "cuFileHandleRegister")); + CUDF_EXPECTS(handle_register != nullptr, "could not find cuFile cuFileHandleRegister symbol"); + handle_deregister = + reinterpret_cast(dlsym(cf_lib, "cuFileHandleDeregister")); + CUDF_EXPECTS(handle_deregister != nullptr, + "could not find cuFile cuFileHandleDeregister symbol"); + read = reinterpret_cast(dlsym(cf_lib, "cuFileRead")); + CUDF_EXPECTS(read != nullptr, "could not find cuFile cuFileRead symbol"); + write = reinterpret_cast(dlsym(cf_lib, "cuFileWrite")); + CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol"); + + CUDF_EXPECTS(driver_open().err == CU_FILE_SUCCESS, "Failed to initialize cuFile driver"); + } catch (cudf::logic_error const &err) { + init_error = std::make_unique(err); + } +} + +cufile_shim const *cufile_shim::instance() +{ + static cufile_shim _instance; + // Defer throwing to avoid repeated attempts to load the library + if (!_instance.is_valid()) CUDF_FAIL("" + std::string(_instance.init_error->what())); + + return &_instance; +} + +void cufile_registered_file::register_handle() +{ + CUfileDescr_t cufile_desc{}; + cufile_desc.handle.fd = _file.desc(); + cufile_desc.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD; + CUDF_EXPECTS(shim->handle_register(&cf_handle, &cufile_desc).err == CU_FILE_SUCCESS, + "Cannot register file handle with cuFile"); +} + +cufile_registered_file::~cufile_registered_file() { shim->handle_deregister(cf_handle); } + +cufile_input_impl::cufile_input_impl(std::string const &filepath) + : shim{cufile_shim::instance()}, cf_file(shim, filepath, O_RDONLY | O_DIRECT) +{ +} + +std::unique_ptr cufile_input_impl::read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) +{ + rmm::device_buffer out_data(size, stream); + CUDF_EXPECTS(shim->read(cf_file.handle(), out_data.data(), size, offset, 0) != -1, + "cuFile error reading from a file"); + + return datasource::buffer::create(std::move(out_data)); +} + +size_t cufile_input_impl::read(size_t offset, + size_t size, + uint8_t *dst, + rmm::cuda_stream_view stream) +{ + CUDF_EXPECTS(shim->read(cf_file.handle(), dst, size, offset, 0) != -1, + "cuFile error reading from a file"); + // always read the requested size for now + return size; +} + +cufile_output_impl::cufile_output_impl(std::string const &filepath) + : shim{cufile_shim::instance()}, cf_file(shim, filepath, O_CREAT | O_RDWR | O_DIRECT, 0664) +{ +} + +void cufile_output_impl::write(void const *data, size_t offset, size_t size) +{ + CUDF_EXPECTS(shim->write(cf_file.handle(), data, size, offset, 0) != -1, + "cuFile error writing to a file"); +} +#endif + +std::unique_ptr make_cufile_input(std::string const &filepath) +{ +#ifdef CUFILE_FOUND + if (cufile_config::instance()->is_enabled()) { + try { + return std::make_unique(filepath); + } catch (...) { + if (cufile_config::instance()->is_required()) throw; + } + } +#endif + return nullptr; +} + +std::unique_ptr make_cufile_output(std::string const &filepath) +{ +#ifdef CUFILE_FOUND + if (cufile_config::instance()->is_enabled()) { + try { + return std::make_unique(filepath); + } catch (...) { + if (cufile_config::instance()->is_required()) throw; + } + } +#endif + return nullptr; +} + +} // namespace detail +} // namespace io +} // namespace cudf diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp new file mode 100644 index 00000000000..85399bdd44d --- /dev/null +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef CUFILE_FOUND +#include +#endif + +#include + +#include +#include + +#include + +namespace cudf { +namespace io { +namespace detail { + +/** + * @brief Class that provides RAII for file handling. + */ +class file_wrapper { + int const fd = -1; + long mutable _size = -1; + + public: + explicit file_wrapper(std::string const &filepath, int flags); + explicit file_wrapper(std::string const &filepath, int flags, mode_t mode); + ~file_wrapper(); + long size() const; + auto desc() const { return fd; } +}; + +/** + * @brief Base class for cuFile input/output. + * + * Contains the common API for cuFile input and output classes. + */ +class cufile_io_base { + public: + /** + * @brief Returns an estimate of whether the cuFile operation is the optimal option. + * + * @param size Read/write operation size, in bytes. + * @return Whether a cuFile operation with the given size is expected to be faster than a host + * read + H2D copy + */ + static bool is_cufile_io_preferred(size_t size) { return size > op_size_threshold; } + + protected: + /** + * @brief The read/write size above which cuFile is faster then host read + copy + * + * This may not be the optimal threshold for all systems. Derived `is_cufile_io_preferred` + * implementations can use a different logic. + */ + static constexpr size_t op_size_threshold = 128 << 10; +}; + +/** + * @brief Interface class for cufile input. + */ +class cufile_input : public cufile_io_base { + public: + /** + * @brief Reads into a new device buffer. + * + * @throws cudf::logic_error on cuFile error + * + * @param offset Number of bytes from the start + * @param size Number of bytes to read + * @param stream CUDA stream to use + * + * @return The data buffer in the device memory + */ + virtual std::unique_ptr read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) = 0; + + /** + * @brief Reads into existing device memory. + * + * @throws cudf::logic_error on cuFile error + * + * @param offset Number of bytes from the start + * @param size Number of bytes to read + * @param dst Address of the existing device memory + * @param stream CUDA stream to use + * + * @return The number of bytes read + */ + virtual size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) = 0; +}; + +/** + * @brief Interface class for cufile output. + */ +class cufile_output : public cufile_io_base { + public: + /** + * @brief Writes the data from a device buffer into a file. + * + * @throws cudf::logic_error on cuFile error + * + * @param data Pointer to the buffer to be written into the output file + * @param offset Number of bytes from the start + * @param size Number of bytes to write + */ + virtual void write(void const *data, size_t offset, size_t size) = 0; +}; + +#ifdef CUFILE_FOUND + +class cufile_shim; + +/** + * @brief Class that provides RAII for cuFile file registration. + */ +struct cufile_registered_file { + void register_handle(); + + public: + cufile_registered_file(cufile_shim const *shim, std::string const &filepath, int flags) + : _file(filepath, flags), shim{shim} + { + register_handle(); + } + + cufile_registered_file(cufile_shim const *shim, + std::string const &filepath, + int flags, + mode_t mode) + : _file(filepath, flags, mode), shim{shim} + { + register_handle(); + } + + auto const &handle() const noexcept { return cf_handle; } + + ~cufile_registered_file(); + + private: + file_wrapper const _file; + CUfileHandle_t cf_handle = nullptr; + cufile_shim const *shim = nullptr; +}; + +/** + * @brief Adapter for the `cuFileRead` API. + * + * Exposes APIs to read directly from a file into device memory. + */ +class cufile_input_impl final : public cufile_input { + public: + cufile_input_impl(std::string const &filepath); + + std::unique_ptr read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override; + + size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) override; + + private: + cufile_shim const *shim = nullptr; + cufile_registered_file const cf_file; +}; + +/** + * @brief Adapter for the `cuFileWrite` API. + * + * Exposes an API to write directly into a file from device memory. + */ +class cufile_output_impl final : public cufile_output { + public: + cufile_output_impl(std::string const &filepath); + + void write(void const *data, size_t offset, size_t size) override; + + private: + cufile_shim const *shim = nullptr; + cufile_registered_file const cf_file; +}; +#else + +class cufile_input_impl final : public cufile_input { + public: + std::unique_ptr read(size_t offset, + size_t size, + rmm::cuda_stream_view stream) override + { + CUDF_FAIL("Only used to compile without cufile library, should not be called"); + } + + size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) override + { + CUDF_FAIL("Only used to compile without cufile library, should not be called"); + } +}; + +class cufile_output_impl final : public cufile_output { + public: + void write(void const *data, size_t offset, size_t size) override + { + CUDF_FAIL("Only used to compile without cufile library, should not be called"); + } +}; +#endif + +/** + * @brief Creates a `cufile_input_impl` object + * + * Returns a null pointer if an exception occurs in the `cufile_input_impl` constructor, or if the + * cuFile library is not installed. + */ +std::unique_ptr make_cufile_input(std::string const &filepath); + +/** + * @brief Creates a `cufile_output_impl` object + * + * Returns a null pointer if an exception occurs in the `cufile_output_impl` constructor, or if the + * cuFile library is not installed. + */ +std::unique_ptr make_cufile_output(std::string const &filepath); + +} // namespace detail +} // namespace io +} // namespace cudf diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu new file mode 100644 index 00000000000..336aabde15e --- /dev/null +++ b/cpp/src/lists/explode.cu @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace { + +std::unique_ptr
build_table( + table_view const& input_table, + size_type const explode_column_idx, + column_view const& sliced_child, + cudf::device_span gather_map, + thrust::optional> explode_col_gather_map, + thrust::optional> position_array, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto select_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [explode_column_idx](size_type i) { return i >= explode_column_idx ? i + 1 : i; }); + + auto gathered_table = + detail::gather(input_table.select(select_iter, select_iter + input_table.num_columns() - 1), + gather_map.begin(), + gather_map.end(), + cudf::out_of_bounds_policy::DONT_CHECK, + stream, + mr); + + std::vector> columns = gathered_table.release()->release(); + + columns.insert(columns.begin() + explode_column_idx, + explode_col_gather_map + ? std::move(detail::gather(table_view({sliced_child}), + explode_col_gather_map->begin(), + explode_col_gather_map->end(), + cudf::out_of_bounds_policy::NULLIFY, + stream, + mr) + ->release()[0]) + : std::make_unique(sliced_child, stream, mr)); + + if (position_array) { + size_type position_size = position_array->size(); + columns.insert(columns.begin() + explode_column_idx, + std::make_unique( + data_type(type_to_id()), position_size, position_array->release())); + } + + return std::make_unique
(std::move(columns)); +} +} // namespace + +std::unique_ptr
explode(table_view const& input_table, + size_type const explode_column_idx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + rmm::device_uvector gather_map(sliced_child.size(), stream); + + // Sliced columns may require rebasing of the offsets. + auto offsets = explode_col.offsets_begin(); + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + thrust::next(offsets), [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + auto counting_iter = thrust::make_counting_iterator(0); + + // This looks like an off-by-one bug, but what is going on here is that we need to reduce each + // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by + // skipping the first entry and using the result of `lower_bound` directly. + thrust::lower_bound(rmm::exec_policy(stream), + offsets_minus_one, + offsets_minus_one + explode_col.size(), + counting_iter, + counting_iter + gather_map.size(), + gather_map.begin()); + + return build_table(input_table, + explode_column_idx, + sliced_child, + gather_map, + thrust::nullopt, + thrust::nullopt, + stream, + mr); +} + +std::unique_ptr
explode_position(table_view const& input_table, + size_type const explode_column_idx, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + rmm::device_uvector gather_map(sliced_child.size(), stream); + + // Sliced columns may require rebasing of the offsets. + auto offsets = explode_col.offsets_begin(); + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + offsets + 1, [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + auto counting_iter = thrust::make_counting_iterator(0); + + rmm::device_uvector pos(sliced_child.size(), stream, mr); + + // This looks like an off-by-one bug, but what is going on here is that we need to reduce each + // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by + // skipping the first entry and using the result of `lower_bound` directly. + thrust::transform( + rmm::exec_policy(stream), + counting_iter, + counting_iter + gather_map.size(), + gather_map.begin(), + [position_array = pos.data(), + offsets_minus_one, + offsets, + offset_size = explode_col.size()] __device__(auto idx) -> size_type { + auto lb_idx = thrust::distance( + offsets_minus_one, + thrust::lower_bound(thrust::seq, offsets_minus_one, offsets_minus_one + offset_size, idx)); + position_array[idx] = idx - (offsets[lb_idx] - offsets[0]); + return lb_idx; + }); + + return build_table(input_table, + explode_column_idx, + sliced_child, + gather_map, + thrust::nullopt, + std::move(pos), + stream, + mr); +} + +std::unique_ptr
explode_outer(table_view const& input_table, + size_type const explode_column_idx, + bool include_position, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + lists_column_view explode_col{input_table.column(explode_column_idx)}; + auto sliced_child = explode_col.get_sliced_child(stream); + auto counting_iter = thrust::make_counting_iterator(0); + auto offsets = explode_col.offsets_begin(); + + // number of nulls or empty lists found so far in the explode column + rmm::device_uvector null_or_empty_offset(explode_col.size(), stream); + + auto null_or_empty = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [offsets, offsets_size = explode_col.size() - 1] __device__(int idx) { + return (idx > offsets_size || (offsets[idx + 1] != offsets[idx])) ? 0 : 1; + }); + thrust::inclusive_scan(rmm::exec_policy(stream), + null_or_empty, + null_or_empty + sliced_child.size(), + null_or_empty_offset.begin()); + + auto null_or_empty_count = + null_or_empty_offset.size() > 0 ? null_or_empty_offset.back_element(stream) : 0; + if (null_or_empty_count == 0) { + // performance penalty to run the below loop if there are no nulls or empty lists. + // run simple explode instead + return include_position ? explode_position(input_table, explode_column_idx, stream, mr) + : explode(input_table, explode_column_idx, stream, mr); + } + + auto gather_map_size = sliced_child.size() + null_or_empty_count; + + rmm::device_uvector gather_map(gather_map_size, stream); + rmm::device_uvector explode_col_gather_map(gather_map_size, stream); + rmm::device_uvector pos(include_position ? gather_map_size : 0, stream, mr); + + // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. + auto offsets_minus_one = thrust::make_transform_iterator( + thrust::next(offsets), [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); + // Fill in gather map with all the child column's entries + thrust::for_each(rmm::exec_policy(stream), + counting_iter, + counting_iter + sliced_child.size(), + [offsets_minus_one, + gather_map = gather_map.begin(), + explode_col_gather_map = explode_col_gather_map.begin(), + position_array = pos.begin(), + include_position, + offsets, + null_or_empty_offset = null_or_empty_offset.begin(), + null_or_empty, + offset_size = explode_col.offsets().size() - 1] __device__(auto idx) { + auto lb_idx = thrust::distance( + offsets_minus_one, + thrust::lower_bound( + thrust::seq, offsets_minus_one, offsets_minus_one + (offset_size), idx)); + auto index_to_write = null_or_empty_offset[lb_idx] + idx; + gather_map[index_to_write] = lb_idx; + explode_col_gather_map[index_to_write] = idx; + if (include_position) { + position_array[index_to_write] = idx - (offsets[lb_idx] - offsets[0]); + } + if (null_or_empty[idx]) { + auto invalid_index = null_or_empty_offset[idx] == 0 + ? offsets[idx] + : offsets[idx] + null_or_empty_offset[idx] - 1; + gather_map[invalid_index] = idx; + + // negative one to indicate a null value + explode_col_gather_map[invalid_index] = -1; + + if (include_position) { position_array[invalid_index] = 0; } + } + }); + + return build_table( + input_table, + explode_column_idx, + sliced_child, + gather_map, + explode_col_gather_map, + include_position ? std::move(pos) : thrust::optional>{}, + stream, + mr); +} + +} // namespace detail + +/** + * @copydoc cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode(input_table, explode_column_idx, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::explode_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_position(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_position(input_table, explode_column_idx, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc cudf::explode_outer(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_outer(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_outer( + input_table, explode_column_idx, false, rmm::cuda_stream_default, mr); +} + +/** + * @copydoc + * cudf::explode_outer_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) + */ +std::unique_ptr
explode_outer_position(table_view const& input_table, + size_type explode_column_idx, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST, + "Unsupported non-list column"); + return detail::explode_outer(input_table, explode_column_idx, true, rmm::cuda_stream_default, mr); +} + +} // namespace cudf diff --git a/cpp/src/reshape/explode.cu b/cpp/src/reshape/explode.cu deleted file mode 100644 index 34d7d8fe31d..00000000000 --- a/cpp/src/reshape/explode.cu +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace cudf { -namespace detail { -namespace { -/** - * @brief Function object for exploding a column. - */ -struct explode_functor { - /** - * @brief Function object for exploding a column. - */ - template - std::unique_ptr
operator()(table_view const& input_table, - size_type const explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const - { - CUDF_FAIL("Unsupported non-list column"); - - return std::make_unique
(); - } -}; - -template <> -std::unique_ptr
explode_functor::operator()( - table_view const& input_table, - size_type const explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const -{ - lists_column_view lc{input_table.column(explode_column_idx)}; - auto sliced_child = lc.get_sliced_child(stream); - rmm::device_uvector gather_map_indices(sliced_child.size(), stream); - - // Sliced columns may require rebasing of the offsets. - auto offsets = lc.offsets_begin(); - // offsets + 1 here to skip the 0th offset, which removes a - 1 operation later. - auto offsets_minus_one = thrust::make_transform_iterator( - offsets + 1, [offsets] __device__(auto i) { return (i - offsets[0]) - 1; }); - auto counting_iter = thrust::make_counting_iterator(0); - - rmm::device_uvector pos(include_pos ? sliced_child.size() : 0, stream, mr); - - // This looks like an off-by-one bug, but what is going on here is that we need to reduce each - // result from `lower_bound` by 1 to build the correct gather map. This can be accomplished by - // skipping the first entry and using the result of `lower_bound` directly. - if (include_pos) { - thrust::transform( - rmm::exec_policy(stream), - counting_iter, - counting_iter + gather_map_indices.size(), - gather_map_indices.begin(), - [position_array = pos.data(), offsets_minus_one, offsets, offset_size = lc.size()] __device__( - auto idx) -> size_type { - auto lb_idx = thrust::lower_bound( - thrust::seq, offsets_minus_one, offsets_minus_one + offset_size, idx) - - offsets_minus_one; - position_array[idx] = idx - (offsets[lb_idx] - offsets[0]); - return lb_idx; - }); - } else { - thrust::lower_bound(rmm::exec_policy(stream), - offsets_minus_one, - offsets_minus_one + lc.size(), - counting_iter, - counting_iter + gather_map_indices.size(), - gather_map_indices.begin()); - } - - auto select_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [explode_column_idx](size_type i) { return i >= explode_column_idx ? i + 1 : i; }); - std::vector selected_columns(select_iter, select_iter + input_table.num_columns() - 1); - - auto gathered_table = cudf::detail::gather(input_table.select(selected_columns), - gather_map_indices.begin(), - gather_map_indices.end(), - cudf::out_of_bounds_policy::DONT_CHECK, - stream, - mr); - - std::vector> columns = gathered_table.release()->release(); - - columns.insert(columns.begin() + explode_column_idx, - std::make_unique(sliced_child, stream, mr)); - - if (include_pos) { - columns.insert(columns.begin() + explode_column_idx, - std::make_unique( - data_type(type_to_id()), sliced_child.size(), pos.release())); - } - - return std::make_unique
(std::move(columns)); -} -} // namespace - -/** - * @copydoc - * cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) - * - * @param stream CUDA stream used for device memory operations and kernel launches. - */ -std::unique_ptr
explode(table_view const& input_table, - size_type explode_column_idx, - bool include_pos, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return type_dispatcher(input_table.column(explode_column_idx).type(), - explode_functor{}, - input_table, - explode_column_idx, - include_pos, - stream, - mr); -} - -} // namespace detail - -/** - * @copydoc cudf::explode(input_table,explode_column_idx,rmm::mr::device_memory_resource) - */ -std::unique_ptr
explode(table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::explode(input_table, explode_column_idx, false, rmm::cuda_stream_default, mr); -} - -/** - * @copydoc cudf::explode_position(input_table,explode_column_idx,rmm::mr::device_memory_resource) - */ -std::unique_ptr
explode_position(table_view const& input_table, - size_type explode_column_idx, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::explode(input_table, explode_column_idx, true, rmm::cuda_stream_default, mr); -} - -} // namespace cudf diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 135df6bdfe2..c1ebc9f3f9f 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -384,7 +384,7 @@ get_null_bounds_for_timestamp_column(column_view const& timestamp_column, if (timestamp_column.has_nulls()) { auto p_timestamps_device_view = column_device_view::create(timestamp_column); - auto num_groups = group_offsets.size(); + auto num_groups = group_offsets.size() - 1; // Null timestamps exist. Find null bounds, per group. thrust::for_each( diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index 10496b89328..0b384ad0631 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -186,91 +186,6 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str mr); } -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - -bool all_integer(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - // check strings for valid float chars - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - -bool all_float(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - } // namespace detail // external API @@ -295,31 +210,5 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str strings, types_to_remove, replacement, types_to_keep, rmm::cuda_stream_default, mr); } -std::unique_ptr is_integer(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_integer(strings, rmm::cuda_stream_default, mr); -} - -std::unique_ptr is_float(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_float(strings, rmm::cuda_stream_default, mr); -} - -bool all_integer(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_integer(strings, rmm::cuda_stream_default); -} - -bool all_float(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_float(strings, rmm::cuda_stream_default); -} - } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 2bf65976986..b6d99efd51f 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -536,12 +537,50 @@ std::unique_ptr from_floats(column_view const& floats, } // namespace detail // external API - std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::from_floats(floats, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + // check strings for valid float chars + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_float(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} + +} // namespace detail + +// external API +std::unique_ptr is_float(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_float(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 112550fc25b..5c5032b5c87 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -245,7 +246,6 @@ std::unique_ptr from_integers(column_view const& integers, } // namespace detail // external API - std::unique_ptr from_integers(column_view const& integers, rmm::mr::device_memory_resource* mr) { @@ -253,5 +253,42 @@ std::unique_ptr from_integers(column_view const& integers, return detail::from_integers(integers, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_integer(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} +} // namespace detail + +// external API +std::unique_ptr is_integer(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_integer(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu index 68080c0eb89..f712b0cb6aa 100644 --- a/cpp/src/strings/substring.cu +++ b/cpp/src/strings/substring.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,17 +43,25 @@ namespace { * using the provided start, stop, and step parameters. */ struct substring_fn { - const column_device_view d_column; - numeric_scalar_device_view d_start, d_stop, d_step; - const int32_t* d_offsets{}; + column_device_view const d_column; + numeric_scalar_device_view const d_start; + numeric_scalar_device_view const d_stop; + numeric_scalar_device_view const d_step; + int32_t* d_offsets{}; char* d_chars{}; - __device__ cudf::size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_column.is_null(idx)) return 0; // null string - string_view d_str = d_column.template element(idx); + if (d_column.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_column.template element(idx); auto const length = d_str.length(); - if (length == 0) return 0; // empty string + if (length == 0) { + if (!d_chars) d_offsets[idx] = 0; + return; + } size_type const step = d_step.is_valid() ? d_step.value() : 1; auto const begin = [&] { // always inclusive // when invalid, default depends on step @@ -88,7 +96,7 @@ struct substring_fn { if (d_buffer) d_buffer += from_char_utf8(*itr, d_buffer); itr += step; } - return bytes; + if (!d_chars) d_offsets[idx] = bytes; } }; @@ -103,42 +111,26 @@ std::unique_ptr slice_strings( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - size_type strings_count = strings.size(); - if (strings_count == 0) return make_empty_strings_column(stream, mr); + if (strings.is_empty()) return make_empty_strings_column(stream, mr); if (step.is_valid()) CUDF_EXPECTS(step.value(stream) != 0, "Step parameter must not be 0"); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto d_start = get_scalar_device_view(const_cast&>(start)); - auto d_stop = get_scalar_device_view(const_cast&>(stop)); - auto d_step = get_scalar_device_view(const_cast&>(step)); - - // copy the null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // build offsets column - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), substring_fn{d_column, d_start, d_stop, d_step}); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_new_offsets = offsets_column->view().data(); - - // build chars column - auto bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column( - strings_count, strings.null_count(), bytes, stream, mr); - auto d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - substring_fn{d_column, d_start, d_stop, d_step, d_new_offsets, d_chars}); + auto const d_column = column_device_view::create(strings.parent(), stream); + auto const d_start = get_scalar_device_view(const_cast&>(start)); + auto const d_stop = get_scalar_device_view(const_cast&>(stop)); + auto const d_step = get_scalar_device_view(const_cast&>(step)); - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + auto children = make_strings_children(substring_fn{*d_column, d_start, d_stop, d_step}, + strings.size(), + strings.null_count(), + stream, + mr); + + return make_strings_column(strings.size(), + std::move(children.first), + std::move(children.second), strings.null_count(), - std::move(null_mask), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); } @@ -166,25 +158,33 @@ namespace { * This both calculates the output size and executes the substring. */ struct substring_from_fn { - const column_device_view d_column; - const cudf::detail::input_indexalator starts; - const cudf::detail::input_indexalator stops; - const int32_t* d_offsets{}; + column_device_view const d_column; + cudf::detail::input_indexalator const starts; + cudf::detail::input_indexalator const stops; + int32_t* d_offsets{}; char* d_chars{}; - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_column.is_null(idx)) return 0; // null string - string_view d_str = d_column.template element(idx); + if (d_column.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_column.template element(idx); auto const length = d_str.length(); auto const start = starts[idx]; - if (start >= length) return 0; // empty string + if (start >= length) { + if (!d_chars) d_offsets[idx] = 0; + return; + } auto const stop = stops[idx]; auto const end = (((stop < 0) || (stop > length)) ? length : stop); - string_view d_substr = d_str.substr(start, end - start); - if (d_chars) memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes()); - return d_substr.size_bytes(); + auto const d_substr = d_str.substr(start, end - start); + if (d_chars) + memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes()); + else + d_offsets[idx] = d_substr.size_bytes(); } }; @@ -212,32 +212,18 @@ std::unique_ptr compute_substrings_from_fn(column_device_view const& d_c auto strings_count = d_column.size(); // Copy the null mask - rmm::device_buffer null_mask{0, stream, mr}; - if (d_column.nullable()) - null_mask = rmm::device_buffer( - d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr); - - // Build offsets column - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), substring_from_fn{d_column, starts, stops}); - auto offsets_column = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_new_offsets = offsets_column->view().data(); - - // Build chars column - auto bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = - cudf::strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.template data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - substring_from_fn{d_column, starts, stops, d_new_offsets, d_chars}); + rmm::device_buffer null_mask = + !d_column.nullable() + ? rmm::device_buffer{0, stream, mr} + : rmm::device_buffer( + d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr); + + auto children = make_strings_children( + substring_from_fn{d_column, starts, stops}, strings_count, null_count, stream, mr); return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), null_count, std::move(null_mask), stream, diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index afda6313254..4cd85fc5e7e 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -81,12 +81,4 @@ std::vector> table::release() return std::move(_columns); } -// Returns a table_view with set of specified columns -table_view table::select(std::vector const& column_indices) const -{ - std::vector columns; - for (auto index : column_indices) { columns.push_back(_columns.at(index)->view()); } - return table_view(columns); -} - } // namespace cudf diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index 9c421f6fd36..c64bf5b2823 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -63,11 +63,7 @@ template class table_view_base; // Returns a table_view with set of specified columns table_view table_view::select(std::vector const& column_indices) const { - std::vector columns(column_indices.size()); - std::transform(column_indices.begin(), column_indices.end(), columns.begin(), [this](auto index) { - return this->column(index); - }); - return table_view(columns); + return select(column_indices.begin(), column_indices.end()); } // Convert mutable view to immutable view diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 40829c74957..e95aab16098 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -14,38 +14,6 @@ # limitations under the License. #============================================================================= -################################################################################################### -# - common test utils ----------------------------------------------------------------------------- - -find_package(Threads REQUIRED) - -add_library(cudftestutil STATIC - utilities/base_fixture.cpp - utilities/column_utilities.cu - utilities/table_utilities.cu - strings/utilities.cu) - -target_compile_options(cudftestutil - PUBLIC "$<$:${CUDF_CXX_FLAGS}>" - "$<$:${CUDF_CUDA_FLAGS}>" -) - -target_compile_features(cudftestutil PUBLIC cxx_std_14 cuda_std_14) - -target_link_libraries(cudftestutil - PUBLIC GTest::gmock - GTest::gtest - Threads::Threads - cudf) - -target_include_directories(cudftestutil - PUBLIC "$" - "$") - -install(TARGETS cudftestutil - DESTINATION lib - EXPORT cudf-targets) - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -319,7 +287,6 @@ ConfigureTest(SEARCH_TEST search/search_test.cpp) # - reshape test ---------------------------------------------------------------------------------- ConfigureTest(RESHAPE_TEST reshape/byte_cast_tests.cpp - reshape/explode_tests.cpp reshape/interleave_columns_tests.cpp reshape/tile_tests.cpp) @@ -422,6 +389,7 @@ ConfigureTest(AST_TEST ast/transform_tests.cpp) ConfigureTest(LISTS_TEST lists/contains_tests.cpp lists/count_elements_tests.cpp + lists/explode_tests.cpp lists/drop_list_duplicates_tests.cpp lists/extract_tests.cpp lists/sort_lists_tests.cpp) diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp new file mode 100644 index 00000000000..2ec9294d118 --- /dev/null +++ b/cpp/tests/lists/explode_tests.cpp @@ -0,0 +1,819 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +using namespace cudf::test; +using FCW = fixed_width_column_wrapper; +using LCW = lists_column_wrapper; + +class ExplodeTest : public cudf::test::BaseFixture { +}; + +class ExplodeOuterTest : public cudf::test::BaseFixture { +}; + +template +class ExplodeTypedTest : public cudf::test::BaseFixture { +}; + +template +class ExplodeOuterTypedTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(ExplodeTypedTest, cudf::test::FixedPointTypes); + +TYPED_TEST_CASE(ExplodeOuterTypedTest, cudf::test::FixedPointTypes); + +TEST_F(ExplodeTest, Empty) +{ + cudf::table_view t({LCW{}, FCW{}}); + + auto ret = cudf::explode(t, 0); + + cudf::table_view expected({FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + auto pos_ret = cudf::explode_position(t, 0); + + cudf::table_view pos_expected({FCW{}, FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NonList) +{ + cudf::table_view t({FCW{100, 200, 300}, FCW{100, 200, 300}}); + + EXPECT_THROW(cudf::explode(t, 1), cudf::logic_error); + EXPECT_THROW(cudf::explode_position(t, 1), cudf::logic_error); +} + +TEST_F(ExplodeTest, Basics) +{ + // a b c + // 100 [1, 2, 7] string0 + // 200 [5, 6] string1 + // 300 [0, 3] string2 + + FCW a{100, 200, 300}; + LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; + strings_column_wrapper c{"string0", "string1", "string2"}; + + FCW expected_a{100, 100, 100, 200, 200, 300, 300}; + FCW expected_b{1, 2, 7, 5, 6, 0, 3}; + strings_column_wrapper expected_c{ + "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; + + cudf::table_view t({a, b, c}); + cudf::table_view expected({expected_a, expected_b, expected_c}); + + auto ret = cudf::explode(t, 1); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); + + auto pos_ret = cudf::explode_position(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, SingleNull) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [] 300 + // [0, 3] 400 + + auto first_invalid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid); + FCW b({100, 200, 300, 400}); + + FCW expected_a{5, 6, 0, 3}; + FCW expected_b{200, 200, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, Nulls) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [0, 3] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto always_valid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids); + FCW b({100, 200, 300}, valids); + + FCW expected_a({1, 2, 7, 0, 3}); + FCW expected_b({100, 100, 100, 300, 300}, always_valid); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInList) +{ + // a b + // [1, 2, 7] 100 + // [5, 6, 0, 9] 200 + // [] 300 + // [0, 3, 8] 400 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)}; + FCW b{100, 200, 300, 400}; + + FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 1, 0, 1}); + FCW expected_b{100, 100, 100, 200, 200, 200, 200, 400, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, Nested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[],[5],[2, 1]] 300 + + LCW a{LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}}; + FCW b{100, 200, 300}; + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}; + FCW expected_b{100, 100, 200, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 3}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NestedNulls) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + auto always_valid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids); + FCW b({100, 200, 300}, valids); + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{0, 3}, LCW{5}, LCW{2, 1}}; + FCW expected_b({100, 100, 300, 300, 300}, always_valid); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInNested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b({100, 200, 300}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b{100, 100, 200, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NullsInNestedDoubleExplode) +{ + // a b + // [[1, 2], [], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}; + FCW b{100, 200, 300}; + + FCW expected_a({1, 2, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + FCW expected_b{100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto first_explode_ret = cudf::explode(t, 0); + auto ret = cudf::explode(first_explode_ret->view(), 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(first_explode_ret->view(), 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, NestedStructs) +{ + // a b + // [[1, 2], [7, 6, 5]] {100, "100"} + // [[5, 6]] {200, "200"} + // [[0, 3],[5],[2, 1]] {300, "300"} + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b1({100, 200, 300}); + strings_column_wrapper b2{"100", "200", "300"}; + structs_column_wrapper b({b1, b2}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b1{100, 100, 200, 300, 300, 300}; + strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + structs_column_wrapper expected_b({expected_b1, expected_b2}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TYPED_TEST(ExplodeTypedTest, ListOfStructs) +{ + // a b + // [{70, "70"}, {75, "75"}] 100 + // [{50, "50"}, {55, "55"}] 200 + // [{35, "35"}, {45, "45"}] 300 + // [{25, "25"}, {30, "30"}] 400 + // [{15, "15"}, {20, "20"}] 500 + + auto numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto a = cudf::make_lists_column( + 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + + FCW b{100, 200, 300, 400, 500}; + + cudf::table_view t({a->view(), b}); + auto ret = cudf::explode(t, 0); + + auto expected_numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper expected_string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + + auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; + + cudf::table_view expected({expected_a->view(), expected_b}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); + + auto pos_ret = cudf::explode_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeTest, SlicedList) +{ + // a b + // [[1, 2],[7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + // [[8, 3],[],[4, 3, 1, 2]] 400 + // [[2, 3, 4],[9, 8]] 500 + + // slicing the top 2 rows and the bottom row off + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}, + LCW{LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}, + LCW{LCW{2, 3, 4}, LCW{9, 8}}}); + FCW b({100, 200, 300, 400, 500}); + + LCW expected_a{ + LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}; + FCW expected_b{300, 300, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + auto sliced_t = cudf::slice(t, {2, 4}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode(sliced_t[0], 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_position(sliced_t[0], 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Empty) +{ + LCW a{}; + FCW b{}; + + cudf::table_view t({LCW{}, FCW{}}); + + auto ret = cudf::explode_outer(t, 0); + + FCW expected_a{}; + FCW expected_b{}; + cudf::table_view expected({FCW{}, FCW{}}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); +} + +TEST_F(ExplodeOuterTest, NonList) +{ + cudf::table_view t({FCW{100, 200, 300}, FCW{100, 200, 300}}); + + EXPECT_THROW(cudf::explode_outer(t, 1), cudf::logic_error); + EXPECT_THROW(cudf::explode_outer_position(t, 1), cudf::logic_error); +} + +TEST_F(ExplodeOuterTest, Basics) +{ + // a b c + // 100 [1, 2, 7] string0 + // 200 [5, 6] string1 + // 300 [0, 3] string2 + + FCW a{100, 200, 300}; + LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}; + strings_column_wrapper c{"string0", "string1", "string2"}; + + FCW expected_a{100, 100, 100, 200, 200, 300, 300}; + FCW expected_b{1, 2, 7, 5, 6, 0, 3}; + strings_column_wrapper expected_c{ + "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; + + cudf::table_view t({a, b, c}); + cudf::table_view expected({expected_a, expected_b, expected_c}); + + auto ret = cudf::explode_outer(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); + + auto pos_ret = cudf::explode_outer_position(t, 1); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, SingleNull) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [] 300 + // [0, 3] 400 + + auto first_invalid = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{}, LCW{0, 3}}, first_invalid); + FCW b({100, 200, 300, 400}); + + FCW expected_a{{0, 5, 6, 0, 0, 3}, {0, 1, 1, 0, 1, 1}}; + FCW expected_b{100, 200, 200, 300, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Nulls) +{ + // a b + // [1, 2, 7] 100 + // [5, 6] 200 + // [0, 3] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}}, valids); + FCW b({100, 200, 300}, valids); + + FCW expected_a({1, 2, 7, 0, 0, 3}, {1, 1, 1, 0, 1, 1}); + FCW expected_b({100, 100, 100, 200, 300, 300}, {1, 1, 1, 0, 1, 1}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInList) +{ + // a b + // [1, 2, 7] 100 + // [5, 6, 0, 9] 200 + // [] 300 + // [0, 3, 8] 400 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW({1, 2, 7}, valids), LCW({5, 6, 0, 9}, valids), LCW{}, LCW({0, 3, 8}, valids)}; + FCW b{100, 200, 300, 400}; + + FCW expected_a({1, 2, 7, 5, 6, 0, 9, 0, 0, 3, 8}, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}); + FCW expected_b{100, 100, 100, 200, 200, 200, 200, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, Nested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[],[5],[2, 1]] 300 + + LCW a{LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}}; + FCW b{100, 200, 300}; + + LCW expected_a{LCW{1, 2}, LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{}, LCW{5}, LCW{2, 1}}; + FCW expected_b{100, 100, 200, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 3}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NestedNulls) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW{1, 2}, LCW{7, 6, 5}}, LCW{LCW{5, 6}}, LCW{LCW{0, 3}, LCW{5}, LCW{2, 1}}}, valids); + FCW b({100, 200, 300}); + + auto expected_valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 2 ? false : true; }); + LCW expected_a({LCW{1, 2}, LCW{7, 6, 5}, LCW{}, LCW{0, 3}, LCW{5}, LCW{2, 1}}, expected_valids); + FCW expected_b({100, 100, 200, 300, 300, 300}); + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInNested) +{ + // a b + // [[1, 2], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b({100, 200, 300}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b{100, 100, 200, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NullsInNestedDoubleExplode) +{ + // a b + // [[1, 2], [], [7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a{LCW{LCW({1, 2}, valids), LCW{}, LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}; + FCW b{100, 200, 300}; + + FCW expected_a({1, 2, 0, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, {1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); + FCW expected_b{100, 100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto first_explode_ret = cudf::explode_outer(t, 0); + auto ret = cudf::explode_outer(first_explode_ret->view(), 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(first_explode_ret->view(), 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, NestedStructs) +{ + // a b + // [[1, 2], [7, 6, 5]] {100, "100"} + // [[5, 6]] {200, "200"} + // [[0, 3],[5],[2, 1]] {300, "300"} + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}}); + FCW b1({100, 200, 300}); + strings_column_wrapper b2{"100", "200", "300"}; + structs_column_wrapper b({b1, b2}); + + LCW expected_a{ + LCW({1, 2}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}; + FCW expected_b1{100, 100, 200, 300, 300, 300}; + strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; + structs_column_wrapper expected_b({expected_b1, expected_b2}); + + cudf::table_view t({a, b}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(t, 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs) +{ + // a b + // [{70, "70"}, {75, "75"}] 100 + // [{50, "50"}, {55, "55"}] 200 + // [{35, "35"}, {45, "45"}] 300 + // [{25, "25"}, {30, "30"}] 400 + // [{15, "15"}, {20, "20"}] 500 + + auto numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); + auto a = cudf::make_lists_column( + 5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {}); + + FCW b{100, 200, 300, 400, 500}; + + cudf::table_view t({a->view(), b}); + auto ret = cudf::explode_outer(t, 0); + + auto expected_numeric_col = + fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; + strings_column_wrapper expected_string_col{ + "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; + + auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); + FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; + + cudf::table_view expected({expected_a->view(), expected_b}); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); + + auto pos_ret = cudf::explode_outer_position(t, 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} + +TEST_F(ExplodeOuterTest, SlicedList) +{ + // a b + // [[1, 2],[7, 6, 5]] 100 + // [[5, 6]] 200 + // [[0, 3],[5],[2, 1]] 300 + // [[8, 3],[],[4, 3, 1, 2]] 400 + // [[2, 3, 4],[9, 8]] 500 + + // slicing the top 2 rows and the bottom row off + + auto valids = cudf::detail::make_counting_transform_iterator( + 0, [](auto i) { return i % 2 == 0 ? true : false; }); + + LCW a({LCW{LCW({1, 2}, valids), LCW{7, 6, 5}}, + LCW{LCW{5, 6}}, + LCW{LCW{0, 3}, LCW{5}, LCW({2, 1}, valids)}, + LCW{LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}, + LCW{LCW{2, 3, 4}, LCW{9, 8}}}); + FCW b({100, 200, 300, 400, 500}); + + LCW expected_a{ + LCW{0, 3}, LCW{5}, LCW({2, 1}, valids), LCW{8, 3}, LCW{}, LCW({4, 3, 1, 2}, valids)}; + FCW expected_b{300, 300, 300, 400, 400, 400}; + + cudf::table_view t({a, b}); + auto sliced_t = cudf::slice(t, {2, 4}); + cudf::table_view expected({expected_a, expected_b}); + + auto ret = cudf::explode_outer(sliced_t[0], 0); + + CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); + + FCW expected_pos_col{0, 1, 2, 0, 1, 2}; + cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); + + auto pos_ret = cudf::explode_outer_position(sliced_t[0], 0); + CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); +} diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index fa3bde8cb52..451fa82d5a3 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -729,4 +729,36 @@ TEST_F(MergeTest, KeysWithNulls) } } +template +struct FixedPointTestBothReps : public cudf::test::BaseFixture { +}; + +template +using fp_wrapper = cudf::test::fixed_point_column_wrapper; + +TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); + +TYPED_TEST(FixedPointTestBothReps, FixedPointMerge) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + + auto const a = fp_wrapper{{4, 22, 33, 44, 55}, scale_type{-1}}; + auto const b = fp_wrapper{{5, 7, 10}, scale_type{-1}}; + auto const table_a = cudf::table_view(std::vector{a}); + auto const table_b = cudf::table_view(std::vector{b}); + auto const tables = std::vector{table_a, table_b}; + + auto const key_cols = std::vector{0}; + auto const order = std::vector{cudf::order::ASCENDING}; + + auto const exp = fp_wrapper{{4, 5, 7, 10, 22, 33, 44, 55}, scale_type{-1}}; + auto const exp_table = cudf::table_view(std::vector{exp}); + + auto const result = cudf::merge(tables, key_cols, order); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(exp_table.column(0), result->view().column(0)); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/reshape/explode_tests.cpp b/cpp/tests/reshape/explode_tests.cpp deleted file mode 100644 index 5f3237ce46d..00000000000 --- a/cpp/tests/reshape/explode_tests.cpp +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -using namespace cudf::test; - -class ExplodeTest : public cudf::test::BaseFixture { -}; - -template -class ExplodeTypedTest : public cudf::test::BaseFixture { -}; - -TYPED_TEST_CASE(ExplodeTypedTest, cudf::test::FixedPointTypes); - -TEST_F(ExplodeTest, Empty) -{ - lists_column_wrapper a{}; - fixed_width_column_wrapper b{}; - - cudf::table_view t({a, b}); - - auto ret = cudf::explode(t, 0); - - fixed_width_column_wrapper expected_a{}; - fixed_width_column_wrapper expected_b{}; - cudf::table_view expected({expected_a, expected_b}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - auto pos_ret = cudf::explode_position(t, 0); - - fixed_width_column_wrapper expected_c{}; - cudf::table_view pos_expected({expected_a, expected_b, expected_c}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NonList) -{ - fixed_width_column_wrapper a{100, 200, 300}; - fixed_width_column_wrapper b{100, 200, 300}; - - cudf::table_view t({a, b}); - - EXPECT_THROW(cudf::explode(t, 1), cudf::logic_error); - EXPECT_THROW(cudf::explode_position(t, 1), cudf::logic_error); -} - -TEST_F(ExplodeTest, Basics) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [0, 3] 300 - */ - - fixed_width_column_wrapper a{100, 200, 300}; - lists_column_wrapper b{lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}}; - strings_column_wrapper c{"string0", "string1", "string2"}; - - fixed_width_column_wrapper expected_a{100, 100, 100, 200, 200, 300, 300}; - fixed_width_column_wrapper expected_b{1, 2, 7, 5, 6, 0, 3}; - strings_column_wrapper expected_c{ - "string0", "string0", "string0", "string1", "string1", "string2", "string2"}; - - cudf::table_view t({a, b, c}); - cudf::table_view expected({expected_a, expected_b, expected_c}); - - auto ret = cudf::explode(t, 1); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 0, 1}; - cudf::table_view pos_expected({expected_a, expected_pos_col, expected_b, expected_c}); - - auto pos_ret = cudf::explode_position(t, 1); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, SingleNull) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [] 300 - [0, 3] 400 - */ - - auto first_invalid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i == 0 ? false : true; }); - - lists_column_wrapper a({lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{}, - lists_column_wrapper{0, 3}}, - first_invalid); - fixed_width_column_wrapper b({100, 200, 300, 400}); - - fixed_width_column_wrapper expected_a{5, 6, 0, 3}; - fixed_width_column_wrapper expected_b{200, 200, 400, 400}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, Nulls) -{ - /* - a b - [1, 2, 7] 100 - [5, 6] 200 - [0, 3] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - auto always_valid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - lists_column_wrapper a({lists_column_wrapper{1, 2, 7}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}}, - valids); - fixed_width_column_wrapper b({100, 200, 300}, valids); - - fixed_width_column_wrapper expected_a({1, 2, 7, 0, 3}); - fixed_width_column_wrapper expected_b({100, 100, 100, 300, 300}, always_valid); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInList) -{ - /* - a b - [1, 2, 7] 100 - [5, 6, 0, 9] 200 - [] 300 - [0, 3, 8] 400 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a{lists_column_wrapper({1, 2, 7}, valids), - lists_column_wrapper({5, 6, 0, 9}, valids), - lists_column_wrapper{}, - lists_column_wrapper({0, 3, 8}, valids)}; - fixed_width_column_wrapper b{100, 200, 300, 400}; - - fixed_width_column_wrapper expected_a({1, 2, 7, 5, 6, 0, 9, 0, 3, 8}, - {1, 0, 1, 1, 0, 1, 0, 1, 0, 1}); - fixed_width_column_wrapper expected_b{100, 100, 100, 200, 200, 200, 200, 400, 400, 400}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 2, 3, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, Nested) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[],[5],[2, 1]] 300 - */ - - lists_column_wrapper a{ - lists_column_wrapper{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}}; - fixed_width_column_wrapper b{100, 200, 300}; - - lists_column_wrapper expected_a{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}; - fixed_width_column_wrapper expected_b{100, 100, 200, 300, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2, 3}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NestedNulls) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - auto always_valid = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}}, - valids); - fixed_width_column_wrapper b({100, 200, 300}, valids); - - lists_column_wrapper expected_a{lists_column_wrapper{1, 2}, - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper{2, 1}}; - fixed_width_column_wrapper expected_b({100, 100, 300, 300, 300}, always_valid); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInNested) -{ - /* - a b - [[1, 2], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}); - fixed_width_column_wrapper b({100, 200, 300}); - - lists_column_wrapper expected_a{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}; - fixed_width_column_wrapper expected_b{100, 100, 200, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NullsInNestedDoubleExplode) -{ - /* - a b - [[1, 2], [], [7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a{ - lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{}, - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}; - fixed_width_column_wrapper b{100, 200, 300}; - - fixed_width_column_wrapper expected_a({1, 2, 7, 6, 5, 5, 6, 0, 3, 5, 2, 1}, - {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0}); - fixed_width_column_wrapper expected_b{ - 100, 100, 100, 100, 100, 200, 200, 300, 300, 300, 300, 300}; - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto first_explode_ret = cudf::explode(t, 0); - auto ret = cudf::explode(first_explode_ret->view(), 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(first_explode_ret->view(), 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, NestedStructs) -{ - /* - a b - [[1, 2], [7, 6, 5]] {100, "100"} - [[5, 6]] {200, "200"} - [[0, 3],[5],[2, 1]] {300, "300"} - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}}); - fixed_width_column_wrapper b1({100, 200, 300}); - strings_column_wrapper b2{"100", "200", "300"}; - structs_column_wrapper b({b1, b2}); - - lists_column_wrapper expected_a{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}, - lists_column_wrapper{5, 6}, - lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}; - fixed_width_column_wrapper expected_b1{100, 100, 200, 300, 300, 300}; - strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"}; - structs_column_wrapper expected_b({expected_b1, expected_b2}); - - cudf::table_view t({a, b}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(t, 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TYPED_TEST(ExplodeTypedTest, ListOfStructs) -{ - /* - a b - [{70, "70"}, {75, "75"}] 100 - [{50, "50"}, {55, "55"}] 200 - [{35, "35"}, {45, "45"}] 300 - [{25, "25"}, {30, "30"}] 400 - [{15, "15"}, {20, "20"}] 500 -*/ - - auto numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release(); - auto a = cudf::make_lists_column(5, - fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), - std::move(struct_col), - cudf::UNKNOWN_NULL_COUNT, - {}); - - fixed_width_column_wrapper b{100, 200, 300, 400, 500}; - - cudf::table_view t({a->view(), b}); - auto ret = cudf::explode(t, 0); - - auto expected_numeric_col = - fixed_width_column_wrapper{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}}; - strings_column_wrapper expected_string_col{ - "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"}; - - auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release(); - fixed_width_column_wrapper expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500}; - - cudf::table_view expected({expected_a->view(), expected_b}); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - cudf::table_view pos_expected({expected_pos_col, expected_a->view(), expected_b}); - - auto pos_ret = cudf::explode_position(t, 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} - -TEST_F(ExplodeTest, SlicedList) -{ - /* - a b - [[1, 2],[7, 6, 5]] 100 - [[5, 6]] 200 - [[0, 3],[5],[2, 1]] 300 - [[8, 3],[],[4, 3, 1, 2]] 400 - [[2, 3, 4],[9, 8]] 500 - - slicing the top 2 rows and the bottom row off - */ - - auto valids = cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return i % 2 == 0 ? true : false; }); - - lists_column_wrapper a( - {lists_column_wrapper{lists_column_wrapper({1, 2}, valids), - lists_column_wrapper{7, 6, 5}}, - lists_column_wrapper{lists_column_wrapper{5, 6}}, - lists_column_wrapper{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids)}, - lists_column_wrapper{lists_column_wrapper{8, 3}, - lists_column_wrapper{}, - lists_column_wrapper({4, 3, 1, 2}, valids)}, - lists_column_wrapper{lists_column_wrapper{2, 3, 4}, - lists_column_wrapper{9, 8}}}); - fixed_width_column_wrapper b({100, 200, 300, 400, 500}); - - lists_column_wrapper expected_a{lists_column_wrapper{0, 3}, - lists_column_wrapper{5}, - lists_column_wrapper({2, 1}, valids), - lists_column_wrapper{8, 3}, - lists_column_wrapper{}, - lists_column_wrapper({4, 3, 1, 2}, valids)}; - fixed_width_column_wrapper expected_b{300, 300, 300, 400, 400, 400}; - - cudf::table_view t({a, b}); - auto sliced_t = cudf::slice(t, {2, 4}); - cudf::table_view expected({expected_a, expected_b}); - - auto ret = cudf::explode(sliced_t[0], 0); - - CUDF_TEST_EXPECT_TABLES_EQUAL(ret->view(), expected); - - fixed_width_column_wrapper expected_pos_col{0, 1, 2, 0, 1, 2}; - cudf::table_view pos_expected({expected_pos_col, expected_a, expected_b}); - - auto pos_ret = cudf::explode_position(sliced_t[0], 0); - CUDF_TEST_EXPECT_TABLES_EQUAL(pos_ret->view(), pos_expected); -} diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 803a9b01b07..702329edaba 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -228,54 +227,6 @@ TEST_F(StringsCharsTest, Numerics) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -TEST_F(StringsCharsTest, Integers) -{ - cudf::test::strings_column_wrapper strings1( - {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); - auto results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_integer(cudf::strings_column_view(strings1))); - - cudf::test::strings_column_wrapper strings2( - {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); - results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_integer(cudf::strings_column_view(strings2))); -} - -TEST_F(StringsCharsTest, Floats) -{ - cudf::test::strings_column_wrapper strings1({"+175", - "-9.8", - "7+2", - "+-4", - "6.7e17", - "-1.2e-5", - "e", - ".e", - "1.e+-2", - "00.00", - "1.0e+1.0", - "1.2.3", - "+", - "--", - ""}); - auto results = cudf::strings::is_float(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1( - {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_float(cudf::strings_column_view(strings1))); - - cudf::test::strings_column_wrapper strings2( - {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); - results = cudf::strings::is_float(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_float(cudf::strings_column_view(strings2))); -} - TEST_F(StringsCharsTest, EmptyStrings) { cudf::test::strings_column_wrapper strings({"", "", ""}); @@ -284,12 +235,6 @@ TEST_F(StringsCharsTest, EmptyStrings) auto results = cudf::strings::all_characters_of_type( strings_view, cudf::strings::string_character_types::ALPHANUM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::is_integer(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_integer(strings_view)); - results = cudf::strings::is_float(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_float(strings_view)); } TEST_F(StringsCharsTest, FilterCharTypes) @@ -379,14 +324,6 @@ TEST_F(StringsCharsTest, EmptyStringsColumn) EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); EXPECT_EQ(0, results->view().size()); - results = cudf::strings::is_integer(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - - results = cudf::strings::is_float(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - results = cudf::strings::filter_characters_of_type( strings_view, cudf::strings::string_character_types::NUMERIC); EXPECT_EQ(cudf::type_id::STRING, results->view().type().id()); diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index f904c404251..bd463a7ab0d 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -19,12 +19,18 @@ #include #include #include +#include #include #include #include #include #include +#include + +#include +#include + #include #include @@ -198,3 +204,31 @@ TEST_F(StringsFactoriesTest, CreateOffsets) } } } + +namespace { +using string_pair = thrust::pair; +struct string_view_to_pair { + __device__ string_pair operator()(thrust::pair const& p) + { + return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; + } +}; +} // namespace + +TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty) +{ + cudf::test::strings_column_wrapper data( + {"", "this", "is", "", "a", "", "column", "of", "strings", "", ""}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1}); + + auto d_column = cudf::column_device_view::create(data); + rmm::device_vector pairs(d_column->size()); + thrust::transform(thrust::device, + d_column->pair_begin(), + d_column->pair_end(), + pairs.data(), + string_view_to_pair{}); + + auto result = cudf::make_strings_column(pairs); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), data); +} diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index b98416d9edd..f7151363d83 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -27,6 +27,41 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsFloat) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_float(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1({"+175", + "-9.8", + "7+2", + "+-4", + "6.7e17", + "-1.2e-5", + "e", + ".e", + "1.e+-2", + "00.00", + "1.0e+1.0", + "1.2.3", + "+", + "--", + ""}); + results = cudf::strings::is_float(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1( + {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); + results = cudf::strings::is_float(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToFloats32) { std::vector h_strings{"1234", diff --git a/cpp/tests/strings/integers_tests.cu b/cpp/tests/strings/integers_tests.cu index 9e2b9809b26..d6bf03b3f76 100644 --- a/cpp/tests/strings/integers_tests.cu +++ b/cpp/tests/strings/integers_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,27 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsInteger) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_integer(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1( + {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToInteger) { std::vector h_strings{ diff --git a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java index e5e849a74c4..68213c21956 100644 --- a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java +++ b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java @@ -38,7 +38,7 @@ public interface BinaryOperable { * with scale=0 as scale is required. Dtype is discarded for binary operations for decimal * types in cudf as a new DType is created for output type with the new scale. */ - static DType implicitConversion(BinaryOperable lhs, BinaryOperable rhs) { + static DType implicitConversion(BinaryOp op, BinaryOperable lhs, BinaryOperable rhs) { DType a = lhs.getType(); DType b = rhs.getType(); if (a.equals(DType.FLOAT64) || b.equals(DType.FLOAT64)) { @@ -86,13 +86,15 @@ static DType implicitConversion(BinaryOperable lhs, BinaryOperable rhs) { int scale = 0; if (a.typeId == DType.DTypeEnum.DECIMAL32) { if (b.typeId == DType.DTypeEnum.DECIMAL32) { - return DType.create(DType.DTypeEnum.DECIMAL32, scale); + return DType.create(DType.DTypeEnum.DECIMAL32, + ColumnView.getFixedPointOutputScale(op, lhs.getType(), rhs.getType())); } else { throw new IllegalArgumentException("Both columns must be of the same fixed_point type"); } } else if (a.typeId == DType.DTypeEnum.DECIMAL64) { if (b.typeId == DType.DTypeEnum.DECIMAL64) { - return DType.create(DType.DTypeEnum.DECIMAL64, scale); + return DType.create(DType.DTypeEnum.DECIMAL64, + ColumnView.getFixedPointOutputScale(op, lhs.getType(), rhs.getType())); } else { throw new IllegalArgumentException("Both columns must be of the same fixed_point type"); } @@ -128,7 +130,7 @@ default ColumnVector add(BinaryOperable rhs, DType outType) { * Add + operator. this + rhs */ default ColumnVector add(BinaryOperable rhs) { - return add(rhs, implicitConversion(this, rhs)); + return add(rhs, implicitConversion(BinaryOp.ADD, this, rhs)); } /** @@ -144,7 +146,7 @@ default ColumnVector sub(BinaryOperable rhs, DType outType) { * Subtract one vector from another. this - rhs */ default ColumnVector sub(BinaryOperable rhs) { - return sub(rhs, implicitConversion(this, rhs)); + return sub(rhs, implicitConversion(BinaryOp.SUB, this, rhs)); } /** @@ -160,7 +162,7 @@ default ColumnVector mul(BinaryOperable rhs, DType outType) { * Multiply two vectors together. this * rhs */ default ColumnVector mul(BinaryOperable rhs) { - return mul(rhs, implicitConversion(this, rhs)); + return mul(rhs, implicitConversion(BinaryOp.MUL, this, rhs)); } /** @@ -176,7 +178,7 @@ default ColumnVector div(BinaryOperable rhs, DType outType) { * Divide one vector by another. this / rhs */ default ColumnVector div(BinaryOperable rhs) { - return div(rhs, implicitConversion(this, rhs)); + return div(rhs, implicitConversion(BinaryOp.DIV, this, rhs)); } /** @@ -192,7 +194,7 @@ default ColumnVector trueDiv(BinaryOperable rhs, DType outType) { * (double)this / (double)rhs */ default ColumnVector trueDiv(BinaryOperable rhs) { - return trueDiv(rhs, implicitConversion(this, rhs)); + return trueDiv(rhs, implicitConversion(BinaryOp.TRUE_DIV, this, rhs)); } /** @@ -208,7 +210,7 @@ default ColumnVector floorDiv(BinaryOperable rhs, DType outType) { * Math.floor(this/rhs) */ default ColumnVector floorDiv(BinaryOperable rhs) { - return floorDiv(rhs, implicitConversion(this, rhs)); + return floorDiv(rhs, implicitConversion(BinaryOp.FLOOR_DIV, this, rhs)); } /** @@ -224,7 +226,7 @@ default ColumnVector mod(BinaryOperable rhs, DType outType) { * this % rhs */ default ColumnVector mod(BinaryOperable rhs) { - return mod(rhs, implicitConversion(this, rhs)); + return mod(rhs, implicitConversion(BinaryOp.MOD, this, rhs)); } /** @@ -240,7 +242,7 @@ default ColumnVector pow(BinaryOperable rhs, DType outType) { * Math.pow(this, rhs) */ default ColumnVector pow(BinaryOperable rhs) { - return pow(rhs, implicitConversion(this, rhs)); + return pow(rhs, implicitConversion(BinaryOp.POW, this, rhs)); } /** @@ -338,7 +340,7 @@ default ColumnVector bitAnd(BinaryOperable rhs, DType outType) { * Bit wise and (&). this & rhs */ default ColumnVector bitAnd(BinaryOperable rhs) { - return bitAnd(rhs, implicitConversion(this, rhs)); + return bitAnd(rhs, implicitConversion(BinaryOp.BITWISE_AND, this, rhs)); } /** @@ -352,7 +354,7 @@ default ColumnVector bitOr(BinaryOperable rhs, DType outType) { * Bit wise or (|). this | rhs */ default ColumnVector bitOr(BinaryOperable rhs) { - return bitOr(rhs, implicitConversion(this, rhs)); + return bitOr(rhs, implicitConversion(BinaryOp.BITWISE_OR, this, rhs)); } /** @@ -366,7 +368,7 @@ default ColumnVector bitXor(BinaryOperable rhs, DType outType) { * Bit wise xor (^). this ^ rhs */ default ColumnVector bitXor(BinaryOperable rhs) { - return bitXor(rhs, implicitConversion(this, rhs)); + return bitXor(rhs, implicitConversion(BinaryOp.BITWISE_XOR, this, rhs)); } /** @@ -380,7 +382,7 @@ default ColumnVector and(BinaryOperable rhs, DType outType) { * Logical and (&&). this && rhs */ default ColumnVector and(BinaryOperable rhs) { - return and(rhs, implicitConversion(this, rhs)); + return and(rhs, implicitConversion(BinaryOp.LOGICAL_AND, this, rhs)); } /** @@ -394,7 +396,7 @@ default ColumnVector or(BinaryOperable rhs, DType outType) { * Logical or (||). this || rhs */ default ColumnVector or(BinaryOperable rhs) { - return or(rhs, implicitConversion(this, rhs)); + return or(rhs, implicitConversion(BinaryOp.LOGICAL_OR, this, rhs)); } /** @@ -421,7 +423,7 @@ default ColumnVector shiftLeft(BinaryOperable shiftBy, DType outType) { * with this[i] << shiftBy */ default ColumnVector shiftLeft(BinaryOperable shiftBy) { - return shiftLeft(shiftBy, implicitConversion(this, shiftBy)); + return shiftLeft(shiftBy, implicitConversion(BinaryOp.SHIFT_LEFT, this, shiftBy)); } /** @@ -447,7 +449,7 @@ default ColumnVector shiftRight(BinaryOperable shiftBy, DType outType) { * with this[i] >> shiftBy */ default ColumnVector shiftRight(BinaryOperable shiftBy) { - return shiftRight(shiftBy, implicitConversion(this, shiftBy)); + return shiftRight(shiftBy, implicitConversion(BinaryOp.SHIFT_RIGHT, this, shiftBy)); } /** @@ -475,7 +477,8 @@ default ColumnVector shiftRightUnsigned(BinaryOperable shiftBy, DType outType) { * with this[i] >>> shiftBy */ default ColumnVector shiftRightUnsigned(BinaryOperable shiftBy) { - return shiftRightUnsigned(shiftBy, implicitConversion(this, shiftBy)); + return shiftRightUnsigned(shiftBy, implicitConversion(BinaryOp.SHIFT_RIGHT_UNSIGNED, this, + shiftBy)); } /** @@ -505,7 +508,7 @@ default ColumnVector arctan2(BinaryOperable xCoordinate, DType outType) { * in radians, between the positive x axis and the ray to the point (x, y) ≠ (0, 0). */ default ColumnVector arctan2(BinaryOperable xCoordinate) { - return arctan2(xCoordinate, implicitConversion(this, xCoordinate)); + return arctan2(xCoordinate, implicitConversion(BinaryOp.ATAN2, this, xCoordinate)); } /** @@ -529,7 +532,7 @@ default ColumnVector pmod(BinaryOperable rhs, DType outputType) { * */ default ColumnVector pmod(BinaryOperable rhs) { - return pmod(rhs, implicitConversion(this, rhs)); + return pmod(rhs, implicitConversion(BinaryOp.PMOD, this, rhs)); } /** @@ -557,7 +560,7 @@ default ColumnVector maxNullAware(BinaryOperable rhs, DType outType) { * Returns the max non null value. */ default ColumnVector maxNullAware(BinaryOperable rhs) { - return maxNullAware(rhs, implicitConversion(this, rhs)); + return maxNullAware(rhs, implicitConversion(BinaryOp.NULL_MAX, this, rhs)); } /** @@ -571,6 +574,7 @@ default ColumnVector minNullAware(BinaryOperable rhs, DType outType) { * Returns the min non null value. */ default ColumnVector minNullAware(BinaryOperable rhs) { - return minNullAware(rhs, implicitConversion(this, rhs)); + return minNullAware(rhs, implicitConversion(BinaryOp.NULL_MIN, this, rhs)); } + } diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index f36896a3c96..2f3f2bf80cf 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -129,6 +129,13 @@ public final long getNativeView() { return viewHandle; } + static int getFixedPointOutputScale(BinaryOp op, DType lhsType, DType rhsType) { + assert (lhsType.isDecimalType() && rhsType.isDecimalType()); + return fixedPointOutputScale(op.nativeId, lhsType.getScale(), rhsType.getScale()); + } + + private static native int fixedPointOutputScale(int op, int lhsScale, int rhsScale); + public final DType getType() { return type; } diff --git a/java/src/main/java/ai/rapids/cudf/HashType.java b/java/src/main/java/ai/rapids/cudf/HashType.java index b521bc5c42c..eb31edd8222 100644 --- a/java/src/main/java/ai/rapids/cudf/HashType.java +++ b/java/src/main/java/ai/rapids/cudf/HashType.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ * Hash algorithm identifiers, mirroring native enum cudf::hash_id */ public enum HashType { - // TODO IDENTITY(0), - // TODO MURMUR3(1), + IDENTITY(0), + MURMUR3(1), HASH_MD5(2), HASH_SERIAL_MURMUR3(3), HASH_SPARK_MURMUR3(4); diff --git a/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java b/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java index bf49fb59d52..6c52b8fe798 100644 --- a/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java +++ b/java/src/main/java/ai/rapids/cudf/JCudfSerialization.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -353,6 +353,50 @@ static SerializedColumnHeader readFrom(DataInputStream din, long rowCount) throw } } + /** Class to hold the header and buffer pair result from host-side concatenation */ + public static final class HostConcatResult implements AutoCloseable { + private final SerializedTableHeader tableHeader; + private final HostMemoryBuffer hostBuffer; + + public HostConcatResult(SerializedTableHeader tableHeader, HostMemoryBuffer tableBuffer) { + this.tableHeader = tableHeader; + this.hostBuffer = tableBuffer; + } + + public SerializedTableHeader getTableHeader() { + return tableHeader; + } + + public HostMemoryBuffer getHostBuffer() { + return hostBuffer; + } + + /** Build a contiguous table in device memory from this host-concatenated result */ + public ContiguousTable toContiguousTable() { + DeviceMemoryBuffer devBuffer = DeviceMemoryBuffer.allocate(hostBuffer.length); + try { + if (hostBuffer.length > 0) { + devBuffer.copyFromHostBuffer(hostBuffer); + } + Table table = sliceUpColumnVectors(tableHeader, devBuffer, hostBuffer); + try { + return new ContiguousTable(table, devBuffer); + } catch (Exception e) { + table.close(); + throw e; + } + } catch (Exception e) { + devBuffer.close(); + throw e; + } + } + + @Override + public void close() { + hostBuffer.close(); + } + } + /** * Visible for testing */ @@ -1681,15 +1725,32 @@ public static Table readAndConcat(SerializedTableHeader[] headers, return ct.getTable(); } + /** + * Concatenate multiple tables in host memory into a contiguous table in device memory. + * @param headers table headers corresponding to the host table buffers + * @param dataBuffers host table buffer for each input table to be concatenated + * @return contiguous table in device memory + */ public static ContiguousTable concatToContiguousTable(SerializedTableHeader[] headers, HostMemoryBuffer[] dataBuffers) throws IOException { + try (HostConcatResult concatResult = concatToHostBuffer(headers, dataBuffers)) { + return concatResult.toContiguousTable(); + } + } + + /** + * Concatenate multiple tables in host memory into a single host table buffer. + * @param headers table headers corresponding to the host table buffers + * @param dataBuffers host table buffer for each input table to be concatenated + * @return host table header and buffer + */ + public static HostConcatResult concatToHostBuffer(SerializedTableHeader[] headers, + HostMemoryBuffer[] dataBuffers) throws IOException { ColumnBufferProvider[][] providersPerColumn = providersFrom(headers, dataBuffers); - DeviceMemoryBuffer devBuffer = null; - Table table = null; try { SerializedTableHeader combined = calcConcatHeader(providersPerColumn); - - try (HostMemoryBuffer hostBuffer = HostMemoryBuffer.allocate(combined.dataLen)) { + HostMemoryBuffer hostBuffer = HostMemoryBuffer.allocate(combined.dataLen); + try { try (NvtxRange range = new NvtxRange("Concat Host Side", NvtxColor.GREEN)) { DataWriter writer = writerFrom(hostBuffer); int numColumns = combined.getNumColumns(); @@ -1697,27 +1758,14 @@ public static ContiguousTable concatToContiguousTable(SerializedTableHeader[] he writeConcat(writer, combined.getColumnHeader(columnIdx), providersPerColumn[columnIdx]); } } - - devBuffer = DeviceMemoryBuffer.allocate(hostBuffer.length); - if (hostBuffer.length > 0) { - try (NvtxRange range = new NvtxRange("Copy Data To Device", NvtxColor.WHITE)) { - devBuffer.copyFromHostBuffer(hostBuffer); - } - } - table = sliceUpColumnVectors(combined, devBuffer, hostBuffer); - ContiguousTable result = new ContiguousTable(table, devBuffer); - table = null; - devBuffer = null; - return result; + } catch (Exception e) { + hostBuffer.close(); + throw e; } + + return new HostConcatResult(combined, hostBuffer); } finally { closeAll(providersPerColumn); - if (table != null) { - table.close(); - } - if (devBuffer != null) { - devBuffer.close(); - } } } diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index fcc23777d69..0dc529d423f 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -183,8 +183,12 @@ public long getDeviceMemorySize() { private static native ContiguousTable[] contiguousSplit(long inputTable, int[] indices); + private static native long[] partition(long inputTable, long partitionView, + int numberOfPartitions, int[] outputOffsets); + private static native long[] hashPartition(long inputTable, int[] columnsToHash, + int hashTypeId, int numberOfPartitions, int[] outputOffsets) throws CudfException; @@ -515,6 +519,10 @@ private static native long[] repeatColumnCount(long tableHandle, private static native long[] explodePosition(long tableHandle, int index); + private static native long[] explodeOuter(long tableHandle, int index); + + private static native long[] explodeOuterPosition(long tableHandle, int index); + private static native long createCudfTableView(long[] nativeColumnViewHandles); private static native long[] columnViewsFromPacked(ByteBuffer metadata, long dataAddress); @@ -1252,6 +1260,24 @@ public Table repeat(ColumnVector counts, boolean checkCount) { return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView(), checkCount)); } + /** + * Partition this table using the mapping in partitionMap. partitionMap must be an integer + * column. The number of rows in partitionMap must be the same as this table. Each row + * in the map will indicate which partition the rows in the table belong to. + * @param partitionMap the partitions for each row. + * @param numberOfPartitions number of partitions + * @return {@link PartitionedTable} Table that exposes a limited functionality of the + * {@link Table} class + */ + public PartitionedTable partition(ColumnView partitionMap, int numberOfPartitions) { + int[] partitionOffsets = new int[numberOfPartitions]; + return new PartitionedTable(new Table(partition( + getNativeView(), + partitionMap.getNativeView(), + partitionOffsets.length, + partitionOffsets)), partitionOffsets); + } + /** * Find smallest indices in a sorted table where values should be inserted to maintain order. *
@@ -1724,7 +1750,7 @@ public ContiguousTable[] contiguousSplit(int... indices) {
    * Example:
    * input:  [[5,10,15], 100],
    *         [[20,25],   200],
-   *         [[30],      300],
+   *         [[30],      300]
    * index: 0
    * output: [5,         100],
    *         [10,        100],
@@ -1736,12 +1762,12 @@ public ContiguousTable[] contiguousSplit(int... indices) {
    *
    * Nulls propagate in different ways depending on what is null.
    * 
-   *     [[5,null,15], 100],
-   *     [null,        200]
-   * returns:
-   *     [5,           100],
-   *     [null,        100],
-   *     [15,          100]
+   * input:  [[5,null,15], 100],
+   *         [null,        200]
+   * index: 0
+   * output: [5,           100],
+   *         [null,        100],
+   *         [15,          100]
    * 
    * Note that null lists are completely removed from the output
    * and nulls inside lists are pulled out and remain.
@@ -1762,27 +1788,26 @@ public Table explode(int index) {
    * in the output. The corresponding rows for other columns in the input are duplicated. A position
    * column is added that has the index inside the original list for each row. Example:
    * 
-   * [[5,10,15], 100],
-   * [[20,25],   200],
-   * [[30],      300],
-   * returns
-   * [0,   5,    100],
-   * [1,   10,   100],
-   * [2,   15,    100],
-   * [0,   20,    200],
-   * [1,   25,    200],
-   * [0,   30,    300],
+   * input:  [[5,10,15], 100],
+   *         [[20,25],   200],
+   *         [[30],      300]
+   * index: 0
+   * output: [0,   5,    100],
+   *         [1,   10,   100],
+   *         [2,   15,   100],
+   *         [0,   20,   200],
+   *         [1,   25,   200],
+   *         [0,   30,   300]
    * 
    *
    * Nulls and empty lists propagate in different ways depending on what is null or empty.
    * 
-   * [[5,null,15], 100],
-   * [null,        200],
-   * [[],          300],
-   * returns
-   * [0,    5,     100],
-   * [1,    null,  100],
-   * [2,    15,    100],
+   * input:  [[5,null,15], 100],
+   *         [null,        200]
+   * index: 0
+   * output: [5,           100],
+   *         [null,        100],
+   *         [15,          100]
    * 
    *
    * Note that null lists are not included in the resulting table, but nulls inside
@@ -1798,6 +1823,96 @@ public Table explodePosition(int index) {
     return new Table(explodePosition(nativeHandle, index));
   }
 
+  /**
+   * Explodes a list column's elements.
+   *
+   * Any list is exploded, which means the elements of the list in each row are expanded
+   * into new rows in the output. The corresponding rows for other columns in the input
+   * are duplicated.
+   *
+   * 
+   * Example:
+   * input:  [[5,10,15], 100],
+   *         [[20,25],   200],
+   *         [[30],      300],
+   * index: 0
+   * output: [5,         100],
+   *         [10,        100],
+   *         [15,        100],
+   *         [20,        200],
+   *         [25,        200],
+   *         [30,        300]
+   * 
+   *
+   * Nulls propagate in different ways depending on what is null.
+   * 
+   *  input:  [[5,null,15], 100],
+   *          [null,        200]
+   * index: 0
+   * output:  [5,           100],
+   *          [null,        100],
+   *          [15,          100],
+   *          [null,        200]
+   * 
+   * Note that null lists are completely removed from the output
+   * and nulls inside lists are pulled out and remain.
+   *
+   * @param index Column index to explode inside the table.
+   * @return A new table with explode_col exploded.
+   */
+  public Table explodeOuter(int index) {
+    assert 0 <= index && index < columns.length : "Column index is out of range";
+    assert columns[index].getType().equals(DType.LIST) : "Column to explode must be of type LIST";
+    return new Table(explodeOuter(nativeHandle, index));
+  }
+
+  /**
+   * Explodes a list column's elements retaining any null entries or empty lists and includes a
+   * position column.
+   *
+   * Any list is exploded, which means the elements of the list in each row are expanded into new rows
+   * in the output. The corresponding rows for other columns in the input are duplicated. A position
+   * column is added that has the index inside the original list for each row. Example:
+   *
+   * 
+   * Example:
+   * input:  [[5,10,15], 100],
+   *         [[20,25],   200],
+   *         [[30],      300],
+   * index: 0
+   * output: [0,   5,    100],
+   *         [1,   10,   100],
+   *         [2,   15,   100],
+   *         [0,   20,   200],
+   *         [1,   25,   200],
+   *         [0,   30,   300]
+   * 
+   *
+   * Nulls and empty lists propagate as null entries in the result.
+   * 
+   * input:  [[5,null,15], 100],
+   *         [null,        200],
+   *         [[],          300]
+   * index: 0
+   * output: [0,     5,    100],
+   *         [1,  null,    100],
+   *         [2,    15,    100],
+   *         [0,  null,    200],
+   *         [0,  null,    300]
+   * 
+   *
+   *    returns
+   *
+   * @param index Column index to explode inside the table.
+   * @return A new table with exploded value and position. The column order of return table is
+   *         [cols before explode_input, explode_position, explode_value, cols after explode_input].
+   */
+  public Table explodeOuterPosition(int index) {
+    assert 0 <= index && index < columns.length : "Column index is out of range";
+    assert columns[index].getType().equals(DType.LIST) : "Column to explode must be of type LIST";
+    return new Table(explodeOuterPosition(nativeHandle, index));
+  }
+
   /**
    * Gathers the rows of this table according to `gatherMap` such that row "i"
    * in the resulting table's columns will contain row "gatherMap[i]" from this table.
@@ -2587,15 +2702,31 @@ public Table leftAntiJoin(TableOperation rightJoinIndices) {
     }
 
     /**
-     * Hash partition a table into the specified number of partitions.
+     * Hash partition a table into the specified number of partitions. Uses the default MURMUR3
+     * hashing.
      * @param numberOfPartitions - number of partitions to use
      * @return - {@link PartitionedTable} - Table that exposes a limited functionality of the
      * {@link Table} class
      */
     public PartitionedTable hashPartition(int numberOfPartitions) {
+      return hashPartition(HashType.MURMUR3, numberOfPartitions);
+    }
+
+    /**
+     * Hash partition a table into the specified number of partitions.
+     * @param type the type of hash to use. Depending on the type of hash different restrictions
+     *             on the hash column(s) may exist. Not all hash functions are guaranteed to work
+     *             besides IDENTITY and MURMUR3.
+     * @param numberOfPartitions - number of partitions to use
+     * @return {@link PartitionedTable} - Table that exposes a limited functionality of the
+     * {@link Table} class
+     */
+    public PartitionedTable hashPartition(HashType type, int numberOfPartitions) {
       int[] partitionOffsets = new int[numberOfPartitions];
-      return new PartitionedTable(new Table(Table.hashPartition(operation.table.nativeHandle,
+      return new PartitionedTable(new Table(Table.hashPartition(
+          operation.table.nativeHandle,
           operation.indices,
+          type.nativeId,
           partitionOffsets.length,
           partitionOffsets)), partitionOffsets);
     }
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index c1239fe69ea..ceafc75f840 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -17,10 +17,7 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 
 # Use GPU_ARCHS if it is defined
 if(DEFINED GPU_ARCHS)
-  unset(CMAKE_CUDA_ARCHITECTURES CACHE)
-  if(NOT "${GPU_ARCHS}" STREQUAL "ALL")
-    set(CMAKE_CUDA_ARCHITECTURES "${GPU_ARCHS}")
-  endif()
+  set(CMAKE_CUDA_ARCHITECTURES "${GPU_ARCHS}")
 endif()
 
 # If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If
@@ -29,11 +26,10 @@ endif()
 
 # This needs to be run before enabling the CUDA language due to the default initialization behavior
 # of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302
-if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
-  set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
+  set(CUDF_JNI_BUILD_FOR_ALL_ARCHS TRUE)
 elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
-  unset(CMAKE_CUDA_ARCHITECTURES CACHE)
-  set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE)
+  set(CUDF_JNI_BUILD_FOR_DETECTED_ARCHS TRUE)
 endif()
 
 project(CUDF_JNI VERSION 0.19 LANGUAGES C CXX)
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index e8474bda1be..ac14e1605d7 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -37,7 +37,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -60,6 +59,7 @@
 #include 
 #include 
 #include 
+#include "cudf/types.hpp"
 
 #include "cudf_jni_apis.hpp"
 #include "dtype_utils.hpp"
@@ -1026,6 +1026,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVV(JNIEnv *env, j
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jint JNICALL Java_ai_rapids_cudf_ColumnView_fixedPointOutputScale(JNIEnv *env, jclass,
+                                                                            jint int_op,
+                                                                            jint lhs_scale,
+                                                                            jint rhs_scale) {
+  try {
+    // we just return the scale as the types will be the same as the lhs input
+    return cudf::binary_operation_fixed_point_scale(static_cast(int_op),
+                                                    lhs_scale, rhs_scale);
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, jclass,
                                                                   jlong lhs_view, jlong rhs_ptr,
                                                                   jint int_op, jint out_dtype,
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index e051f68be4e..81b9882104f 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1613,9 +1614,43 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_concatenate(JNIEnv *env,
   CATCH_STD(env, NULL);
 }
 
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_partition(JNIEnv *env, jclass,
+                                                                 jlong input_table,
+                                                                 jlong partition_column,
+                                                                 jint number_of_partitions,
+                                                                 jintArray output_offsets) {
+
+  JNI_NULL_CHECK(env, input_table, "input table is null", NULL);
+  JNI_NULL_CHECK(env, partition_column, "partition_column is null", NULL);
+  JNI_NULL_CHECK(env, output_offsets, "output_offsets is null", NULL);
+  JNI_ARG_CHECK(env, number_of_partitions > 0, "number_of_partitions is zero", NULL);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::table_view *n_input_table = reinterpret_cast(input_table);
+    cudf::column_view *n_part_column = reinterpret_cast(partition_column);
+    cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
+
+    auto result = cudf::partition(*n_input_table,
+                                  *n_part_column,
+                                  number_of_partitions);
+
+    for (size_t i = 0; i < result.second.size() - 1; i++) {
+      // for what ever reason partition returns the length of the result at then
+      // end and hash partition/round robin do not, so skip the last entry for
+      // consistency
+      n_output_offsets[i] = result.second[i];
+    }
+
+    return cudf::jni::convert_table_for_return(env, result.first);
+  }
+  CATCH_STD(env, NULL);
+}
+
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env, jclass,
                                                                      jlong input_table,
                                                                      jintArray columns_to_hash,
+                                                                     jint hash_function,
                                                                      jint number_of_partitions,
                                                                      jintArray output_offsets) {
 
@@ -1626,6 +1661,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
 
   try {
     cudf::jni::auto_set_device(env);
+    cudf::hash_id hash_func = static_cast(hash_function);
     cudf::table_view *n_input_table = reinterpret_cast(input_table);
     cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash);
     cudf::jni::native_jintArray n_output_offsets(env, output_offsets);
@@ -1638,7 +1674,10 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
     }
 
     std::pair, std::vector> result =
-        cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions);
+        cudf::hash_partition(*n_input_table,
+                             columns_to_hash_vec,
+                             number_of_partitions,
+                             hash_func);
 
     for (size_t i = 0; i < result.second.size(); i++) {
       n_output_offsets[i] = result.second[i];
@@ -2046,4 +2085,32 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_explodePosition(JNIEnv *e
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_explodeOuter(JNIEnv *env, jclass,
+                                                                    jlong input_jtable,
+                                                                    jint column_index) {
+  JNI_NULL_CHECK(env, input_jtable, "explode: input table is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::table_view *input_table = reinterpret_cast(input_jtable);
+    cudf::size_type col_index = static_cast(column_index);
+    std::unique_ptr exploded = cudf::explode_outer(*input_table, col_index);
+    return cudf::jni::convert_table_for_return(env, exploded);
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_explodeOuterPosition(JNIEnv *env, jclass,
+                                                                            jlong input_jtable,
+                                                                            jint column_index) {
+  JNI_NULL_CHECK(env, input_jtable, "explode: input table is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::table_view *input_table = reinterpret_cast(input_jtable);
+    cudf::size_type col_index = static_cast(column_index);
+    std::unique_ptr exploded = cudf::explode_outer_position(*input_table, col_index);
+    return cudf::jni::convert_table_for_return(env, exploded);
+  }
+  CATCH_STD(env, 0);
+}
+
 } // extern "C"
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 88196a4112a..c075f074068 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -1742,7 +1742,7 @@ void testPartStability() {
     final int PARTS = 5;
     int expectedPart = -1;
     try (Table start = new Table.TestBuilder().column(0).build();
-         PartitionedTable out = start.onColumns(0).partition(PARTS)) {
+         PartitionedTable out = start.onColumns(0).hashPartition(PARTS)) {
       // Lets figure out what partitions this is a part of.
       int[] parts = out.getPartitions();
       for (int i = 0; i < parts.length; i++) {
@@ -1755,7 +1755,7 @@ void testPartStability() {
     for (int numEntries = 1; numEntries < COUNT; numEntries++) {
       try (ColumnVector data = ColumnVector.build(DType.INT32, numEntries, Range.appendInts(0, numEntries));
            Table t = new Table(data);
-           PartitionedTable out = t.onColumns(0).partition(PARTS);
+           PartitionedTable out = t.onColumns(0).hashPartition(PARTS);
            HostColumnVector tmp = out.getColumn(0).copyToHost()) {
         // Now we need to get the range out for the partition we expect
         int[] parts = out.getPartitions();
@@ -1775,6 +1775,23 @@ void testPartStability() {
 
   @Test
   void testPartition() {
+    try (Table t = new Table.TestBuilder()
+        .column(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        .build();
+         ColumnVector parts = ColumnVector
+             .fromInts(1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+         PartitionedTable pt = t.partition(parts, 3);
+         Table expected = new Table.TestBuilder()
+             .column(1, 3, 5, 7, 9, 2, 4, 6, 8, 10)
+             .build()) {
+      int[] partCutoffs = pt.getPartitions();
+      assertArrayEquals(new int[]{0, 0, 5}, partCutoffs);
+      assertTablesAreEqual(expected, pt.getTable());
+    }
+  }
+
+  @Test
+  void testIdentityHashPartition() {
     final int count = 1024 * 1024;
     try (ColumnVector aIn = ColumnVector.build(DType.INT64, count, Range.appendLongs(count));
          ColumnVector bIn = ColumnVector.build(DType.INT32, count, (b) -> {
@@ -1793,7 +1810,57 @@ void testPartition() {
         expected.add(i);
       }
       try (Table input = new Table(new ColumnVector[]{aIn, bIn, cIn});
-           PartitionedTable output = input.onColumns(0).partition(5)) {
+           PartitionedTable output = input.onColumns(0).hashPartition(HashType.IDENTITY, 5)) {
+        int[] parts = output.getPartitions();
+        assertEquals(5, parts.length);
+        assertEquals(0, parts[0]);
+        int previous = 0;
+        long rows = 0;
+        for (int i = 1; i < parts.length; i++) {
+          assertTrue(parts[i] >= previous);
+          rows += parts[i] - previous;
+          previous = parts[i];
+        }
+        assertTrue(rows <= count);
+        try (HostColumnVector aOut = output.getColumn(0).copyToHost();
+             HostColumnVector bOut = output.getColumn(1).copyToHost();
+             HostColumnVector cOut = output.getColumn(2).copyToHost()) {
+
+          for (int i = 0; i < count; i++) {
+            long fromA = aOut.getLong(i);
+            long fromB = bOut.getInt(i);
+            String fromC = cOut.getJavaString(i);
+            assertTrue(expected.remove(fromA));
+            assertEquals(fromA / 2, fromB);
+            assertEquals(String.valueOf(fromA), fromC, "At Index " + i);
+          }
+          assertTrue(expected.isEmpty());
+        }
+      }
+    }
+  }
+
+  @Test
+  void testHashPartition() {
+    final int count = 1024 * 1024;
+    try (ColumnVector aIn = ColumnVector.build(DType.INT64, count, Range.appendLongs(count));
+         ColumnVector bIn = ColumnVector.build(DType.INT32, count, (b) -> {
+           for (int i = 0; i < count; i++) {
+             b.append(i / 2);
+           }
+         });
+         ColumnVector cIn = ColumnVector.build(DType.STRING, count, (b) -> {
+           for (int i = 0; i < count; i++) {
+             b.appendUTF8String(String.valueOf(i).getBytes());
+           }
+         })) {
+
+      HashSet expected = new HashSet<>();
+      for (long i = 0; i < count; i++) {
+        expected.add(i);
+      }
+      try (Table input = new Table(new ColumnVector[]{aIn, bIn, cIn});
+           PartitionedTable output = input.onColumns(0).hashPartition(5)) {
         int[] parts = output.getPartitions();
         assertEquals(5, parts.length);
         assertEquals(0, parts[0]);
@@ -4585,7 +4652,7 @@ private Table[] buildExplodeTestTableWithPrimitiveTypes(boolean pos, boolean out
     }
   }
 
-  private Table[] buildExplodeTestTableWithNestedTypes(boolean pos) {
+  private Table[] buildExplodeTestTableWithNestedTypes(boolean pos, boolean outer) {
     StructType nestedType = new StructType(true,
         new BasicType(false, DType.INT32), new BasicType(false, DType.STRING));
     try (Table input = new Table.TestBuilder()
@@ -4594,23 +4661,42 @@ private Table[] buildExplodeTestTableWithNestedTypes(boolean pos) {
             Arrays.asList(struct(4, "k4"), struct(5, "k5")),
             Arrays.asList(struct(6, "k6")),
             Arrays.asList(new HostColumnVector.StructData((List) null)),
-            Arrays.asList())
+            null)
         .column("s1", "s2", "s3", "s4", "s5")
         .column(1, 3, 5, 7, 9)
         .column(12.0, 14.0, 13.0, 11.0, 15.0)
         .build()) {
       Table.TestBuilder expectedBuilder = new Table.TestBuilder();
       if (pos) {
-        expectedBuilder.column(0, 1, 2, 0, 1, 0, 0);
+        if (!outer)
+          expectedBuilder.column(0, 1, 2, 0, 1, 0, 0);
+        else
+          expectedBuilder.column(0, 1, 2, 0, 1, 0, 0, 0);
       }
-      try (Table expected = expectedBuilder
-          .column(nestedType,
+      List expectedData = new ArrayList(){{
+        if (!outer) {
+          this.add(new HostColumnVector.StructData[]{
               struct(1, "k1"), struct(2, "k2"), struct(3, "k3"),
               struct(4, "k4"), struct(5, "k5"), struct(6, "k6"),
-              new HostColumnVector.StructData((List) null))
-          .column("s1", "s1", "s1", "s2", "s2", "s3", "s4")
-          .column(1, 1, 1, 3, 3, 5, 7)
-          .column(12.0, 12.0, 12.0, 14.0, 14.0, 13.0, 11.0)
+              new HostColumnVector.StructData((List) null)});
+          this.add(new String[]{"s1", "s1", "s1", "s2", "s2", "s3", "s4"});
+          this.add(new Integer[]{1, 1, 1, 3, 3, 5, 7});
+          this.add(new Double[]{12.0, 12.0, 12.0, 14.0, 14.0, 13.0, 11.0});
+        } else {
+          this.add(new HostColumnVector.StructData[]{
+              struct(1, "k1"), struct(2, "k2"), struct(3, "k3"),
+              struct(4, "k4"), struct(5, "k5"), struct(6, "k6"),
+              new HostColumnVector.StructData((List) null), null});
+          this.add(new String[]{"s1", "s1", "s1", "s2", "s2", "s3", "s4", "s5"});
+          this.add(new Integer[]{1, 1, 1, 3, 3, 5, 7, 9});
+          this.add(new Double[]{12.0, 12.0, 12.0, 14.0, 14.0, 13.0, 11.0, 15.0});
+        }
+      }};
+      try (Table expected = expectedBuilder
+          .column(nestedType, (HostColumnVector.StructData[]) expectedData.get(0))
+          .column((String[]) expectedData.get(1))
+          .column((Integer[]) expectedData.get(2))
+          .column((Double[]) expectedData.get(3))
           .build()) {
         return new Table[]{new Table(input.getColumns()), new Table(expected.getColumns())};
       }
@@ -4629,7 +4715,7 @@ void testExplode() {
     }
 
     // Child is nested type
-    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(false);
+    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(false, false);
     try (Table input = testTables2[0];
          Table expected = testTables2[1]) {
       try (Table exploded = input.explode(0)) {
@@ -4639,7 +4725,7 @@ void testExplode() {
   }
 
   @Test
-  void testPosExplode() {
+  void testExplodePosition() {
     // Child is primitive type
     Table[] testTables = buildExplodeTestTableWithPrimitiveTypes(true, false);
     try (Table input = testTables[0];
@@ -4649,8 +4735,8 @@ void testPosExplode() {
       }
     }
 
-    // Child is primitive type
-    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(true);
+    // Child is nested type
+    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(true, false);
     try (Table input = testTables2[0];
          Table expected = testTables2[1]) {
       try (Table exploded = input.explodePosition(0)) {
@@ -4659,4 +4745,45 @@ void testPosExplode() {
     }
   }
 
+  @Test
+  void testExplodeOuter() {
+    // Child is primitive type
+    Table[] testTables = buildExplodeTestTableWithPrimitiveTypes(false, true);
+    try (Table input = testTables[0];
+         Table expected = testTables[1]) {
+      try (Table exploded = input.explodeOuter(0)) {
+        assertTablesAreEqual(expected, exploded);
+      }
+    }
+
+    // Child is nested type
+    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(false, true);
+    try (Table input = testTables2[0];
+         Table expected = testTables2[1]) {
+      try (Table exploded = input.explodeOuter(0)) {
+        assertTablesAreEqual(expected, exploded);
+      }
+    }
+  }
+
+  @Test
+  void testExplodeOuterPosition() {
+    // Child is primitive type
+    Table[] testTables = buildExplodeTestTableWithPrimitiveTypes(true, true);
+    try (Table input = testTables[0];
+         Table expected = testTables[1]) {
+      try (Table exploded = input.explodeOuterPosition(0)) {
+        assertTablesAreEqual(expected, exploded);
+      }
+    }
+
+    // Child is nested type
+    Table[] testTables2 = buildExplodeTestTableWithNestedTypes(true, true);
+    try (Table input = testTables2[0];
+         Table expected = testTables2[1]) {
+      try (Table exploded = input.explodeOuterPosition(0)) {
+        assertTablesAreEqual(expected, exploded);
+      }
+    }
+  }
 }
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index 59a6b876961..5eaec640b15 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -93,6 +93,9 @@ class BinaryOperation(IntEnum):
     GENERIC_BINARY = (
          binary_operator.GENERIC_BINARY
     )
+    NULL_EQUALS = (
+         binary_operator.NULL_EQUALS
+    )
 
 
 cdef binaryop_v_v(Column lhs, Column rhs,
@@ -154,17 +157,6 @@ cdef binaryop_s_v(DeviceScalar lhs, Column rhs,
     return Column.from_unique_ptr(move(c_result))
 
 
-def handle_null_for_string_column(Column input_col, op):
-    if op in ('eq', 'lt', 'le', 'gt', 'ge'):
-        return replace_nulls(input_col, DeviceScalar(False, 'bool'))
-
-    elif op == 'ne':
-        return replace_nulls(input_col, DeviceScalar(True, 'bool'))
-
-    # Nothing needs to be done
-    return input_col
-
-
 def binaryop(lhs, rhs, op, dtype):
     """
     Dispatches a binary op call to the appropriate libcudf function:
@@ -205,11 +197,7 @@ def binaryop(lhs, rhs, op, dtype):
             c_op,
             c_dtype
         )
-
-    if is_string_col is True:
-        return handle_null_for_string_column(result, op.name.lower())
-    else:
-        return result
+    return result
 
 
 def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index ad798a73ed2..e5501428624 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -3,7 +3,7 @@
 import pandas as pd
 
 from libcpp cimport bool
-from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.memory cimport make_unique, unique_ptr, shared_ptr, make_shared
 from libcpp.vector cimport vector
 from libcpp.utility cimport move
 from libc.stdint cimport int32_t, int64_t
@@ -24,6 +24,10 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
+from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
+from cudf._lib.cpp.lists.gather cimport (
+    segmented_gather as cpp_segmented_gather
+)
 cimport cudf._lib.cpp.copying as cpp_copying
 
 # workaround for https://github.com/cython/cython/issues/3885
@@ -704,3 +708,22 @@ def sample(Table input, size_type n,
             else input._index_names
         )
     )
+
+
+def segmented_gather(Column source_column, Column gather_map):
+    cdef shared_ptr[lists_column_view] source_LCV = (
+        make_shared[lists_column_view](source_column.view())
+    )
+    cdef shared_ptr[lists_column_view] gather_map_LCV = (
+        make_shared[lists_column_view](gather_map.view())
+    )
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_segmented_gather(
+                source_LCV.get()[0], gather_map_LCV.get()[0])
+        )
+
+    result = Column.from_unique_ptr(move(c_result))
+    return result
diff --git a/python/cudf/cudf/_lib/cpp/binaryop.pxd b/python/cudf/cudf/_lib/cpp/binaryop.pxd
index fb36fdfd639..2e36070a164 100644
--- a/python/cudf/cudf/_lib/cpp/binaryop.pxd
+++ b/python/cudf/cudf/_lib/cpp/binaryop.pxd
@@ -27,6 +27,7 @@ cdef extern from "cudf/binaryop.hpp" namespace "cudf" nogil:
         GREATER "cudf::binary_operator::GREATER"
         LESS_EQUAL "cudf::binary_operator::LESS_EQUAL"
         GREATER_EQUAL "cudf::binary_operator::GREATER_EQUAL"
+        NULL_EQUALS "cudf::binary_operator::NULL_EQUALS"
         BITWISE_AND "cudf::binary_operator::BITWISE_AND"
         BITWISE_OR "cudf::binary_operator::BITWISE_OR"
         BITWISE_XOR "cudf::binary_operator::BITWISE_XOR"
diff --git a/python/cudf/cudf/_lib/cpp/lists/gather.pxd b/python/cudf/cudf/_lib/cpp/lists/gather.pxd
new file mode 100644
index 00000000000..ea664eee82e
--- /dev/null
+++ b/python/cudf/cudf/_lib/cpp/lists/gather.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
+
+
+cdef extern from "cudf/lists/gather.hpp" namespace "cudf::lists" nogil:
+    cdef unique_ptr[column] segmented_gather(
+        const lists_column_view source_column,
+        const lists_column_view gather_map_list
+    ) except +
diff --git a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd
index ad675027c10..934269c6f25 100644
--- a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd
+++ b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -33,11 +33,3 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \
         string_character_types types_to_remove,
         string_scalar replacement,
         string_character_types types_to_keep) except +
-
-    cdef unique_ptr[column] is_integer(
-        column_view source_strings
-    ) except +
-
-    cdef unique_ptr[column] is_float(
-        column_view source_strings
-    ) except +
diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd
index baee01b8f99..55a84b60efd 100644
--- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd
+++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -14,3 +14,7 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \
 
     cdef unique_ptr[column] from_floats(
         column_view input_col) except +
+
+    cdef unique_ptr[column] is_float(
+        column_view source_strings
+    ) except +
diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
index 92f99a2f5cb..6e45d4ba869 100644
--- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
+++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -15,6 +15,10 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
     cdef unique_ptr[column] from_integers(
         column_view input_col) except +
 
+    cdef unique_ptr[column] is_integer(
+        column_view source_strings
+    ) except +
+
     cdef unique_ptr[column] hex_to_integers(
         column_view input_col,
         data_type output_type) except +
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index 7b455dd574b..2185cb089a7 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -57,6 +57,8 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
             return incol.dtype.type(0)
         if reduction_op == 'product':
             return incol.dtype.type(1)
+        if reduction_op == "any":
+            return False
 
         return cudf.utils.dtypes._get_nan_for_dtype(col_dtype)
 
diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx
index 5d8d1522418..1890e98f956 100644
--- a/python/cudf/cudf/_lib/strings/char_types.pyx
+++ b/python/cudf/cudf/_lib/strings/char_types.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -14,8 +14,6 @@ from cudf._lib.cpp.strings.char_types cimport (
     all_characters_of_type as cpp_all_characters_of_type,
     filter_characters_of_type as cpp_filter_characters_of_type,
     string_character_types as string_character_types,
-    is_integer as cpp_is_integer,
-    is_float as cpp_is_float,
 )
 
 
@@ -191,35 +189,3 @@ def is_space(Column source_strings):
         ))
 
     return Column.from_unique_ptr(move(c_result))
-
-
-def is_integer(Column source_strings):
-    """
-    Returns a Column of boolean values with True for `source_strings`
-    that have intergers.
-    """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_is_integer(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
-
-
-def is_float(Column source_strings):
-    """
-    Returns a Column of boolean values with True for `source_strings`
-    that have floats.
-    """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_is_float(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
new file mode 100644
index 00000000000..195d9b71f6e
--- /dev/null
+++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
@@ -0,0 +1,29 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.column cimport Column
+from cudf._lib.cpp.column.column cimport column
+
+from cudf._lib.cpp.strings.convert.convert_floats cimport (
+    is_float as cpp_is_float,
+)
+
+
+def is_float(Column source_strings):
+    """
+    Returns a Column of boolean values with True for `source_strings`
+    that have floats.
+    """
+    cdef unique_ptr[column] c_result
+    cdef column_view source_view = source_strings.view()
+
+    with nogil:
+        c_result = move(cpp_is_float(
+            source_view
+        ))
+
+    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
new file mode 100644
index 00000000000..d1bae1edd37
--- /dev/null
+++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
@@ -0,0 +1,29 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.column cimport Column
+from cudf._lib.cpp.column.column cimport column
+
+from cudf._lib.cpp.strings.convert.convert_integers cimport (
+    is_integer as cpp_is_integer,
+)
+
+
+def is_integer(Column source_strings):
+    """
+    Returns a Column of boolean values with True for `source_strings`
+    that have intergers.
+    """
+    cdef unique_ptr[column] c_result
+    cdef column_view source_view = source_strings.view()
+
+    with nogil:
+        c_result = move(cpp_is_integer(
+            source_view
+        ))
+
+    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index c41a458f02b..39c278d2abf 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1014,7 +1014,11 @@ def slice(
     def binary_operator(
         self, op: str, rhs, reflect: bool = False
     ) -> ColumnBase:
-        if not (self.ordered and rhs.ordered) and op not in ("eq", "ne"):
+        if not (self.ordered and rhs.ordered) and op not in (
+            "eq",
+            "ne",
+            "NULL_EQUALS",
+        ):
             if op in ("lt", "gt", "le", "ge"):
                 raise TypeError(
                     "Unordered Categoricals can only compare equality or not"
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 2bb35c97d7c..b2b2874eeb4 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -179,7 +179,11 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool:
         if check_dtypes:
             if self.dtype != other.dtype:
                 return False
-        return (self == other).min()
+        null_equals = self._null_equals(other)
+        return null_equals.all()
+
+    def _null_equals(self, other: ColumnBase) -> ColumnBase:
+        return self.binary_operator("NULL_EQUALS", other)
 
     def all(self) -> bool:
         return bool(libcudf.reduce.reduce("all", self, dtype=np.bool_))
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 7c5385b9bbf..a563248f4ab 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -274,7 +274,7 @@ def binary_operator(
         if isinstance(rhs, cudf.DateOffset):
             return binop_offset(self, rhs, op)
         lhs, rhs = self, rhs
-        if op in ("eq", "ne", "lt", "gt", "le", "ge"):
+        if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"):
             out_dtype = np.dtype(np.bool_)  # type: Dtype
         elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
             out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype(
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index a60fe627acb..1d3f73822a9 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -2,14 +2,16 @@
 
 import pickle
 
+import numpy as np
 import pyarrow as pa
 
 import cudf
+from cudf._lib.copying import segmented_gather
 from cudf._lib.lists import count_elements
 from cudf.core.buffer import Buffer
-from cudf.core.column import ColumnBase, column
+from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethodsMixin
-from cudf.utils.dtypes import is_list_dtype
+from cudf.utils.dtypes import is_list_dtype, is_numerical_dtype
 
 
 class ListColumn(ColumnBase):
@@ -228,3 +230,58 @@ def len(self):
         dtype: int32
         """
         return self._return_or_inplace(count_elements(self._column))
+
+    def take(self, lists_indices):
+        """
+        Collect list elements based on given indices.
+
+        Parameters
+        ----------
+        lists_indices: List type arrays
+            Specifies what to collect from each row
+
+        Returns
+        -------
+        ListColumn
+
+        Examples
+        --------
+        >>> s = cudf.Series([[1, 2, 3], None, [4, 5]])
+        >>> s
+        0    [1, 2, 3]
+        1         None
+        2       [4, 5]
+        dtype: list
+        >>> s.list.take([[0, 1], [], []])
+        0    [1, 2]
+        1      None
+        2        []
+        dtype: list
+        """
+
+        lists_indices_col = as_column(lists_indices)
+        if not isinstance(lists_indices_col, ListColumn):
+            raise ValueError("lists_indices should be list type array.")
+        if not lists_indices_col.size == self._column.size:
+            raise ValueError(
+                "lists_indices and list column is of different " "size."
+            )
+        if not is_numerical_dtype(
+            lists_indices_col.children[1].dtype
+        ) or not np.issubdtype(
+            lists_indices_col.children[1].dtype, np.integer
+        ):
+            raise TypeError(
+                "lists_indices should be column of values of index types."
+            )
+
+        try:
+            res = self._return_or_inplace(
+                segmented_gather(self._column, lists_indices_col)
+            )
+        except RuntimeError as e:
+            if "contains nulls" in str(e):
+                raise ValueError("lists_indices contains null.") from e
+            raise
+        else:
+            return res
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 6fae8c644e3..7ad6eed65a8 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -700,16 +700,21 @@ def _numeric_column_binop(
     if reflect:
         lhs, rhs = rhs, lhs
 
-    is_op_comparison = op in ["lt", "gt", "le", "ge", "eq", "ne"]
+    is_op_comparison = op in [
+        "lt",
+        "gt",
+        "le",
+        "ge",
+        "eq",
+        "ne",
+        "NULL_EQUALS",
+    ]
 
     if is_op_comparison:
         out_dtype = "bool"
 
     out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
 
-    if is_op_comparison:
-        out = out.fillna(op == "ne")
-
     return out
 
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 81abdd3f66a..11dd7556812 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -70,13 +70,15 @@
     is_alpha as cpp_is_alpha,
     is_decimal as cpp_is_decimal,
     is_digit as cpp_is_digit,
-    is_float as cpp_is_float,
-    is_integer as cpp_is_integer,
     is_lower as cpp_is_lower,
     is_numeric as cpp_is_numeric,
     is_space as cpp_isspace,
     is_upper as cpp_is_upper,
 )
+from cudf._lib.strings.convert.convert_integers import (
+    is_integer as cpp_is_integer,
+)
+from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float
 from cudf._lib.strings.combine import (
     concatenate as cpp_concatenate,
     join as cpp_join,
@@ -434,7 +436,6 @@ def cat(self, others=None, sep=None, na_rep=None):
         3    dD
         dtype: object
         """
-
         if sep is None:
             sep = ""
 
@@ -5109,7 +5110,7 @@ def binary_operator(
         if isinstance(rhs, (StringColumn, str, cudf.Scalar)):
             if op == "add":
                 return cast("column.ColumnBase", lhs.str().cat(others=rhs))
-            elif op in ("eq", "ne", "gt", "lt", "ge", "le"):
+            elif op in ("eq", "ne", "gt", "lt", "ge", "le", "NULL_EQUALS"):
                 return _string_column_binop(self, rhs, op=op, out_dtype="bool")
 
         raise TypeError(
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index ac63192b692..e22b511db01 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -223,7 +223,7 @@ def binary_operator(
 
         if op in ("eq", "ne"):
             out_dtype = self._binary_op_eq_ne(rhs)
-        elif op in ("lt", "gt", "le", "ge"):
+        elif op in ("lt", "gt", "le", "ge", "NULL_EQUALS"):
             out_dtype = self._binary_op_lt_gt_le_ge(rhs)
         elif op == "mul":
             out_dtype = self._binary_op_mul(rhs)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ecdce9443a1..9672ab3002f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1518,11 +1518,7 @@ def fallback(col, fn):
                 else:
                     if col not in df_cols:
                         r_opr = other_cols[col]
-                        l_opr = Series(
-                            column_empty(
-                                len(self), masked=True, dtype=other.dtype
-                            )
-                        )
+                        l_opr = Series(as_column(np.nan, length=len(self)))
                     if col not in other_cols_keys:
                         r_opr = None
                         l_opr = self[col]
@@ -2198,7 +2194,7 @@ def rpow(self, other, axis="columns", level=None, fill_value=None):
         return self._apply_op("rpow", other, fill_value)
 
     def __rpow__(self, other):
-        return self._apply_op("__pow__", other)
+        return self._apply_op("__rpow__", other)
 
     def floordiv(self, other, axis="columns", level=None, fill_value=None):
         """
@@ -6031,7 +6027,6 @@ def isin(self, values):
         falcon      True       True
         dog        False      False
         """
-
         if isinstance(values, dict):
 
             result_df = DataFrame()
@@ -6051,14 +6046,15 @@ def isin(self, values):
             values = values.reindex(self.index)
 
             result = DataFrame()
-
+            # TODO: propagate nulls through isin
+            # https://github.com/rapidsai/cudf/issues/7556
             for col in self._data.names:
                 if isinstance(
                     self[col]._column, cudf.core.column.CategoricalColumn
                 ) and isinstance(
                     values._column, cudf.core.column.CategoricalColumn
                 ):
-                    res = self._data[col] == values._column
+                    res = (self._data[col] == values._column).fillna(False)
                     result[col] = res
                 elif (
                     isinstance(
@@ -6073,7 +6069,9 @@ def isin(self, values):
                 ):
                     result[col] = utils.scalar_broadcast_to(False, len(self))
                 else:
-                    result[col] = self._data[col] == values._column
+                    result[col] = (self._data[col] == values._column).fillna(
+                        False
+                    )
 
             result.index = self.index
             return result
@@ -6083,7 +6081,9 @@ def isin(self, values):
             result = DataFrame()
             for col in self._data.names:
                 if col in values.columns:
-                    result[col] = self._data[col] == values[col]._column
+                    result[col] = (
+                        self._data[col] == values[col]._column
+                    ).fillna(False)
                 else:
                     result[col] = utils.scalar_broadcast_to(False, len(self))
             result.index = self.index
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 275d085ef5d..fab5936f94d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1566,10 +1566,7 @@ def _apply_boolean_mask(self, boolean_mask):
         rows corresponding to `False` is dropped
         """
         boolean_mask = as_column(boolean_mask)
-        if boolean_mask.has_nulls:
-            raise ValueError(
-                "cannot mask with boolean_mask containing null values"
-            )
+
         result = self.__class__._from_table(
             libcudf.stream_compaction.apply_boolean_mask(
                 self, as_column(boolean_mask)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 11e32e2285d..b06fef178f6 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1501,9 +1501,7 @@ def _binaryop(
         If ``reflect`` is ``True``, swap the order of the operands.
         """
         if isinstance(other, cudf.DataFrame):
-            # TODO: fn is not the same as arg expected by _apply_op
-            # e.g. for fn = 'and', _apply_op equivalent is '__and__'
-            return other._apply_op(self, fn)
+            return NotImplemented
 
         result_name = utils.get_result_name(self, other)
         if isinstance(other, Series):
@@ -3120,8 +3118,10 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
                 "bool_only parameter is not implemented yet"
             )
 
-        if self.empty:
-            return False
+        skipna = False if skipna is None else skipna
+
+        if skipna is False and self.has_nulls:
+            return True
 
         if skipna:
             result_series = self.nans_to_nulls()
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 4e5e4ce1987..535e497e8dc 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -8,7 +8,9 @@
 from pandas.core.tools.datetimes import _unit_map
 
 import cudf
-from cudf._lib.strings.char_types import is_integer as cpp_is_integer
+from cudf._lib.strings.convert.convert_integers import (
+    is_integer as cpp_is_integer,
+)
 from cudf.core import column
 from cudf.core.index import as_index
 from cudf.utils.dtypes import is_scalar
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index a0b65743180..18f2d7e474b 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -206,12 +206,45 @@ def test_series_compare(cmpop, obj_class, dtype):
     np.testing.assert_equal(result3.to_array(), cmpop(arr1, arr2))
 
 
+def _series_compare_nulls_typegen():
+    tests = []
+    tests += list(product(DATETIME_TYPES, DATETIME_TYPES))
+    tests += list(product(TIMEDELTA_TYPES, TIMEDELTA_TYPES))
+    tests += list(product(NUMERIC_TYPES, NUMERIC_TYPES))
+    tests += list(product(STRING_TYPES, STRING_TYPES))
+
+    return tests
+
+
+@pytest.mark.parametrize("cmpop", _cmpops)
+@pytest.mark.parametrize("dtypes", _series_compare_nulls_typegen())
+def test_series_compare_nulls(cmpop, dtypes):
+    ltype, rtype = dtypes
+
+    ldata = [1, 2, None, None, 5]
+    rdata = [2, 1, None, 4, None]
+
+    lser = Series(ldata, dtype=ltype)
+    rser = Series(rdata, dtype=rtype)
+
+    lmask = ~lser.isnull()
+    rmask = ~rser.isnull()
+
+    expect_mask = np.logical_and(lmask, rmask)
+    expect = cudf.Series([None] * 5, dtype="bool")
+    expect[expect_mask] = cmpop(lser[expect_mask], rser[expect_mask])
+
+    got = cmpop(lser, rser)
+    utils.assert_eq(expect, got)
+
+
 @pytest.mark.parametrize(
-    "obj", [pd.Series(["a", "b", None, "d", "e", None]), "a"]
+    "obj", [pd.Series(["a", "b", None, "d", "e", None], dtype="string"), "a"]
 )
 @pytest.mark.parametrize("cmpop", _cmpops)
 @pytest.mark.parametrize(
-    "cmp_obj", [pd.Series(["b", "a", None, "d", "f", None]), "a"]
+    "cmp_obj",
+    [pd.Series(["b", "a", None, "d", "f", None], dtype="string"), "a"],
 )
 def test_string_series_compare(obj, cmpop, cmp_obj):
 
@@ -221,10 +254,12 @@ def test_string_series_compare(obj, cmpop, cmp_obj):
     g_cmp_obj = cmp_obj
     if isinstance(g_cmp_obj, pd.Series):
         g_cmp_obj = Series.from_pandas(g_cmp_obj)
-
     got = cmpop(g_obj, g_cmp_obj)
     expected = cmpop(obj, cmp_obj)
 
+    if isinstance(expected, pd.Series):
+        expected = cudf.from_pandas(expected)
+
     utils.assert_eq(expected, got)
 
 
@@ -694,10 +729,12 @@ def test_operator_func_series_and_scalar(
 def test_operator_func_between_series_logical(
     dtype, func, scalar_a, scalar_b, fill_value
 ):
-    gdf_series_a = Series([scalar_a]).astype(dtype)
-    gdf_series_b = Series([scalar_b]).astype(dtype)
-    pdf_series_a = gdf_series_a.to_pandas()
-    pdf_series_b = gdf_series_b.to_pandas()
+
+    gdf_series_a = Series([scalar_a], nan_as_null=False).astype(dtype)
+    gdf_series_b = Series([scalar_b], nan_as_null=False).astype(dtype)
+
+    pdf_series_a = gdf_series_a.to_pandas(nullable=True)
+    pdf_series_b = gdf_series_b.to_pandas(nullable=True)
 
     gdf_series_result = getattr(gdf_series_a, func)(
         gdf_series_b, fill_value=fill_value
@@ -705,16 +742,22 @@ def test_operator_func_between_series_logical(
     pdf_series_result = getattr(pdf_series_a, func)(
         pdf_series_b, fill_value=fill_value
     )
-
-    if scalar_a in [None, np.nan] and scalar_b in [None, np.nan]:
-        # cudf binary operations will return `None` when both left- and right-
-        # side values are `None`. It will return `np.nan` when either side is
-        # `np.nan`. As a consequence, when we convert our gdf => pdf during
-        # assert_eq, we get a pdf with dtype='object' (all inputs are none).
-        # to account for this, we use fillna.
-        gdf_series_result.fillna(func == "ne", inplace=True)
-
-    utils.assert_eq(pdf_series_result, gdf_series_result)
+    expect = pdf_series_result
+    got = gdf_series_result.to_pandas(nullable=True)
+
+    # If fill_value is np.nan, things break down a bit,
+    # because setting a NaN into a pandas nullable float
+    # array still gets transformed to . As such,
+    # pd_series_with_nulls.fillna(np.nan) has no effect.
+    if (
+        (pdf_series_a.isnull().sum() != pdf_series_b.isnull().sum())
+        and np.isscalar(fill_value)
+        and np.isnan(fill_value)
+    ):
+        with pytest.raises(AssertionError):
+            utils.assert_eq(expect, got)
+        return
+    utils.assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("dtype", ["float32", "float64"])
@@ -729,8 +772,7 @@ def test_operator_func_series_and_scalar_logical(
     gdf_series = utils.gen_rand_series(
         dtype, 1000, has_nulls=has_nulls, stride=10000
     )
-    pdf_series = gdf_series.to_pandas()
-
+    pdf_series = gdf_series.to_pandas(nullable=True)
     gdf_series_result = getattr(gdf_series, func)(
         cudf.Scalar(scalar) if use_cudf_scalar else scalar,
         fill_value=fill_value,
@@ -739,7 +781,10 @@ def test_operator_func_series_and_scalar_logical(
         scalar, fill_value=fill_value
     )
 
-    utils.assert_eq(pdf_series_result, gdf_series_result)
+    expect = pdf_series_result
+    got = gdf_series_result.to_pandas(nullable=True)
+
+    utils.assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("func", _operators_arithmetic)
@@ -1738,10 +1783,61 @@ def test_equality_ops_index_mismatch(fn):
         index=["aa", "b", "c", "d", "e", "f", "y", "z"],
     )
 
-    pa = a.to_pandas()
-    pb = b.to_pandas()
-
+    pa = a.to_pandas(nullable=True)
+    pb = b.to_pandas(nullable=True)
     expected = getattr(pa, fn)(pb)
-    actual = getattr(a, fn)(b)
+    actual = getattr(a, fn)(b).to_pandas(nullable=True)
 
     utils.assert_eq(expected, actual)
+
+
+def generate_test_null_equals_columnops_data():
+    # Generate tuples of:
+    # (left_data, right_data, compare_bool
+    # where compare_bool is the correct answer to
+    # if the columns should compare as null equals
+
+    def set_null_cases(column_l, column_r, case):
+        if case == "neither":
+            return column_l, column_r
+        elif case == "left":
+            column_l[1] = None
+        elif case == "right":
+            column_r[1] = None
+        elif case == "both":
+            column_l[1] = None
+            column_r[1] = None
+        else:
+            raise ValueError("Unknown null case")
+        return column_l, column_r
+
+    null_cases = ["neither", "left", "right", "both"]
+    data = [1, 2, 3]
+
+    results = []
+    # TODO: Numeric types can be cross compared as null equal
+    for dtype in (
+        list(NUMERIC_TYPES)
+        + list(DATETIME_TYPES)
+        + list(TIMEDELTA_TYPES)
+        + list(STRING_TYPES)
+        + ["category"]
+    ):
+        for case in null_cases:
+            left = cudf.Series(data, dtype=dtype)
+            right = cudf.Series(data, dtype=dtype)
+            if case in {"left", "right"}:
+                answer = False
+            else:
+                answer = True
+            left, right = set_null_cases(left, right, case)
+            results.append((left._column, right._column, answer, case))
+
+    return results
+
+
+@pytest.mark.parametrize(
+    "lcol,rcol,ans,case", generate_test_null_equals_columnops_data()
+)
+def test_null_equals_columnops(lcol, rcol, ans, case):
+    assert lcol._null_equals(rcol).all() == ans
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ffd66e18314..5f4d571e8c5 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -4996,13 +4996,13 @@ def test_cov_nans():
 @pytest.mark.parametrize(
     "gsr",
     [
-        cudf.Series([1, 2, 3]),
-        cudf.Series([1, 2, 3], index=["a", "b", "c"]),
-        cudf.Series([1, 2, 3], index=["a", "b", "d"]),
-        cudf.Series([1, 2], index=["a", "b"]),
-        cudf.Series([1, 2, 3], index=cudf.core.index.RangeIndex(0, 3)),
+        cudf.Series([4, 2, 3]),
+        cudf.Series([4, 2, 3], index=["a", "b", "c"]),
+        cudf.Series([4, 2, 3], index=["a", "b", "d"]),
+        cudf.Series([4, 2], index=["a", "b"]),
+        cudf.Series([4, 2, 3], index=cudf.core.index.RangeIndex(0, 3)),
         pytest.param(
-            cudf.Series([1, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
+            cudf.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
             marks=pytest.mark.xfail,
         ),
     ],
@@ -5026,21 +5026,23 @@ def test_cov_nans():
     ],
 )
 def test_df_sr_binop(gsr, colnames, op):
-    data = [[0, 2, 5], [3, None, 5], [6, 7, np.nan]]
+    data = [[3.0, 2.0, 5.0], [3.0, None, 5.0], [6.0, 7.0, np.nan]]
     data = dict(zip(colnames, data))
 
+    gsr = gsr.astype("float64")
+
     gdf = cudf.DataFrame(data)
-    pdf = pd.DataFrame.from_dict(data)
+    pdf = gdf.to_pandas(nullable=True)
 
-    psr = gsr.to_pandas()
+    psr = gsr.to_pandas(nullable=True)
 
     expect = op(pdf, psr)
-    got = op(gdf, gsr)
-    assert_eq(expect.astype(float), got.astype(float))
+    got = op(gdf, gsr).to_pandas(nullable=True)
+    assert_eq(expect, got, check_dtype=False)
 
     expect = op(psr, pdf)
-    got = op(psr, pdf)
-    assert_eq(expect.astype(float), got.astype(float))
+    got = op(gsr, gdf).to_pandas(nullable=True)
+    assert_eq(expect, got, check_dtype=False)
 
 
 @pytest.mark.parametrize(
@@ -5052,12 +5054,14 @@ def test_df_sr_binop(gsr, colnames, op):
         operator.truediv,
         operator.mod,
         operator.pow,
-        operator.eq,
-        operator.lt,
-        operator.le,
-        operator.gt,
-        operator.ge,
-        operator.ne,
+        # comparison ops will temporarily XFAIL
+        # see PR  https://github.com/rapidsai/cudf/pull/7491
+        pytest.param(operator.eq, marks=pytest.mark.xfail()),
+        pytest.param(operator.lt, marks=pytest.mark.xfail()),
+        pytest.param(operator.le, marks=pytest.mark.xfail()),
+        pytest.param(operator.gt, marks=pytest.mark.xfail()),
+        pytest.param(operator.ge, marks=pytest.mark.xfail()),
+        pytest.param(operator.ne, marks=pytest.mark.xfail()),
     ],
 )
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 558700f1f89..cec2623027f 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -755,17 +755,6 @@ def do_slice(x):
     assert_eq(expect, got, check_dtype=False)
 
 
-def test_dataframe_boolean_mask_with_None():
-    pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]})
-    gdf = cudf.DataFrame.from_pandas(pdf)
-    pdf_masked = pdf[[True, False, True, False]]
-    gdf_masked = gdf[[True, False, True, False]]
-    assert_eq(pdf_masked, gdf_masked)
-
-    with pytest.raises(ValueError):
-        gdf[cudf.Series([True, False, None, False])]
-
-
 @pytest.mark.parametrize("dtype", [int, float, str])
 def test_empty_boolean_mask(dtype):
     gdf = cudf.datasets.randomdata(nrows=0, dtypes={"a": dtype})
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 195d8749ec6..33812cfa7a7 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -112,3 +112,50 @@ def test_len(data):
     got = gsr.list.len()
 
     assert_eq(expect, got, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("data", "idx"),
+    [
+        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[0, 1], [2], [1, 2]]),
+        ([[1, 2, 3], [3, 4, 5], [4, 5, 6]], [[1, 2, 0], [1, 0, 2], [0, 1, 2]]),
+        ([[1, 2, 3], []], [[0, 1], []]),
+        ([[1, 2, 3], [None]], [[0, 1], []]),
+        ([[1, None, 3], None], [[0, 1], []]),
+    ],
+)
+def test_take(data, idx):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+
+    expected = pd.Series(zip(ps, idx)).map(
+        lambda x: [x[0][i] for i in x[1]] if x[0] is not None else None
+    )
+    got = gs.list.take(idx)
+    assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    ("invalid", "exception"),
+    [
+        ([[0]], pytest.raises(ValueError, match="different size")),
+        ([1, 2, 3, 4], pytest.raises(ValueError, match="should be list type")),
+        (
+            [["a", "b"], ["c"]],
+            pytest.raises(
+                TypeError, match="should be column of values of index types"
+            ),
+        ),
+        (
+            [[[1], [0]], [[0]]],
+            pytest.raises(
+                TypeError, match="should be column of values of index types"
+            ),
+        ),
+        ([[0, 1], None], pytest.raises(ValueError, match="contains null")),
+    ],
+)
+def test_take_invalid(invalid, exception):
+    gs = cudf.Series([[0, 1], [2, 3]])
+    with exception:
+        gs.list.take(invalid)
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index ed91e909f25..ca8aa00f80c 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -724,3 +724,17 @@ def test_orc_bool_encode_fail():
     # Also validate data
     pdf = pa.orc.ORCFile(buffer).read().to_pandas()
     assert_eq(okay_df, pdf)
+
+
+def test_nanoseconds_overflow():
+    buffer = BytesIO()
+    # Use nanosecond values that take more than 32 bits to encode
+    s = cudf.Series([710424008, -1338482640], dtype="datetime64[ns]")
+    expected = cudf.DataFrame({"s": s})
+    expected.to_orc(buffer)
+
+    cudf_got = cudf.read_orc(buffer)
+    assert_eq(expected, cudf_got)
+
+    pyarrow_got = pa.orc.ORCFile(buffer).read()
+    assert_eq(expected.to_pandas(), pyarrow_got.to_pandas())
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 4d2e2a4b33b..1005efec3ee 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -143,15 +143,14 @@ def test_setitem_dataframe_series_inplace(df):
 )
 def test_series_set_equal_length_object_by_mask(replace_data):
 
-    psr = pd.Series([1, 2, 3, 4, 5])
+    psr = pd.Series([1, 2, 3, 4, 5], dtype="Int64")
     gsr = cudf.from_pandas(psr)
 
     # Lengths match in trivial case
-    pd_bool_col = pd.Series([True] * len(psr))
+    pd_bool_col = pd.Series([True] * len(psr), dtype="boolean")
     gd_bool_col = cudf.from_pandas(pd_bool_col)
-
     psr[pd_bool_col] = (
-        replace_data.to_pandas()
+        replace_data.to_pandas(nullable=True)
         if hasattr(replace_data, "to_pandas")
         else replace_data
     )