Merge remote-tracking branch 'upstream/branch-0.19' into mwilson/expl…

…ode_outer
rapidsai · Mar 16, 2021 · 44066f7 · 44066f7
2 parents 03701e7 + c1c60ba
commit 44066f7
Show file tree

Hide file tree

Showing 90 changed files with 4,484 additions and 971 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # cuDF 0.19.0 (Date TBD)
 
-Please see https://github.com/rapidsai/cudf/releases/tag/branch-0.19-latest for the latest changes to this development branch.
+Please see https://github.com/rapidsai/cudf/releases/tag/v0.19.0a for the latest changes to this development branch.
 
 # cuDF 0.18.0 (24 Feb 2021)
 

diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh
@@ -28,6 +28,7 @@ fi
 ################################################################################
 
 gpuci_logger "Get conda file output locations"
+
 export LIBCUDF_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf --output`
 export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot ${WORKSPACE}/.conda-bld conda/recipes/libcudf_kafka --output`
 export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output`
@@ -44,36 +45,36 @@ if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
   test -e ${LIBCUDF_FILE}
   echo "Upload libcudf"
   echo ${LIBCUDF_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_FILE} --no-progress
 fi
 
 if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
   test -e ${CUDF_FILE}
   echo "Upload cudf"
   echo ${CUDF_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE} --no-progress
 
   test -e ${DASK_CUDF_FILE}
   echo "Upload dask-cudf"
   echo ${DASK_CUDF_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE} --no-progress
 
   test -e ${CUSTREAMZ_FILE}
   echo "Upload custreamz"
   echo ${CUSTREAMZ_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE} --no-progress
 fi
 
 if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF_KAFKA" == "1" ]]; then
   test -e ${LIBCUDF_KAFKA_FILE}
   echo "Upload libcudf_kafka"
   echo ${LIBCUDF_KAFKA_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_KAFKA_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_KAFKA_FILE} --no-progress
 fi
 
 if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF_KAFKA" == "1" ]]; then
   test -e ${CUDF_KAFKA_FILE}
   echo "Upload cudf_kafka"
   echo ${CUDF_KAFKA_FILE}
-  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_KAFKA_FILE}
+  gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_KAFKA_FILE} --no-progress
 fi
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -132,8 +132,10 @@ test:
     - test -f $PREFIX/include/cudf/join.hpp
     - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
     - test -f $PREFIX/include/cudf/lists/detail/copying.hpp
+    - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
     - test -f $PREFIX/include/cudf/lists/count_elements.hpp
     - test -f $PREFIX/include/cudf/lists/explode.hpp
+    - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp
     - test -f $PREFIX/include/cudf/lists/extract.hpp
     - test -f $PREFIX/include/cudf/lists/contains.hpp
     - test -f $PREFIX/include/cudf/lists/gather.hpp
@@ -168,10 +170,10 @@ test:
     - test -f $PREFIX/include/cudf/strings/convert/convert_integers.hpp
     - test -f $PREFIX/include/cudf/strings/convert/convert_ipv4.hpp
     - test -f $PREFIX/include/cudf/strings/convert/convert_urls.hpp
-    - test -f $PREFIX/include/cudf/strings/copying.hpp
     - test -f $PREFIX/include/cudf/strings/detail/combine.hpp
     - test -f $PREFIX/include/cudf/strings/detail/concatenate.hpp
     - test -f $PREFIX/include/cudf/strings/detail/converters.hpp
+    - test -f $PREFIX/include/cudf/strings/detail/copying.hpp
     - test -f $PREFIX/include/cudf/strings/detail/fill.hpp
     - test -f $PREFIX/include/cudf/strings/detail/replace.hpp
     - test -f $PREFIX/include/cudf/strings/detail/utilities.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -22,10 +22,9 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 
 # This needs to be run before enabling the CUDA language due to the default initialization behavior
 # of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302
-if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES OR CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
   set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
 elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
-  unset(CMAKE_CUDA_ARCHITECTURES CACHE)
   set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE)
 endif()
 
@@ -259,6 +258,7 @@ add_library(cudf
     src/lists/count_elements.cu
     src/lists/explode.cu
     src/lists/extract.cu
+    src/lists/drop_list_duplicates.cu
     src/lists/lists_column_factories.cu
     src/lists/lists_column_view.cu
     src/lists/segmented_sort.cu

diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -17,26 +17,16 @@
 # Find the CUDAToolkit
 find_package(CUDAToolkit REQUIRED)
 
-# Must come after find_package(CUDAToolkit) because we symlink
-# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
-enable_language(CUDA)
-
-if(CMAKE_CUDA_COMPILER_VERSION)
-  # Compute the version. from  CMAKE_CUDA_COMPILER_VERSION
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
-  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
-  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
-endif()
-
-message(VERBOSE "CUDF: CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
-message(VERBOSE "CUDF: CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
-message(STATUS "CUDF: CUDA_VERSION: ${CUDA_VERSION}")
-
 # Auto-detect available GPU compute architectures
-
 include(${CUDF_SOURCE_DIR}/cmake/Modules/SetGPUArchs.cmake)
 message(STATUS "CUDF: Building CUDF for GPU architectures: ${CMAKE_CUDA_ARCHITECTURES}")
 
+# Must come after find_package(CUDAToolkit) because we symlink
+# ccache as a compiler front-end for nvcc in gpuCI CPU builds.
+# Must also come after we detect and potentially rewrite
+# CMAKE_CUDA_ARCHITECTURES
+enable_language(CUDA)
+
 if(CMAKE_COMPILER_IS_GNUCXX)
     list(APPEND CUDF_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
     if(CUDF_BUILD_TESTS OR CUDF_BUILD_BENCHMARKS)

diff --git a/cpp/cmake/Modules/EvalGPUArchs.cmake b/cpp/cmake/Modules/EvalGPUArchs.cmake
@@ -14,12 +14,21 @@
 # limitations under the License.
 #=============================================================================
 
+# Unset this first in case it's set to <empty_string>
+set(CMAKE_CUDA_ARCHITECTURES OFF)
+
+# Enable CUDA so we can invoke nvcc
+enable_language(CUDA)
+
+# Function uses the CUDA runtime API to query the compute capability of the device, so if a user
+# doesn't pass any architecture options to CMake we only build the current architecture
 function(evaluate_gpu_archs gpu_archs)
   set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu)
   set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs)
   set(error_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.stderr.log)
-  file(WRITE ${eval_file}
-[=[
+  file(
+    WRITE ${eval_file}
+    "
 #include <cstdio>
 #include <set>
 #include <string>
@@ -32,32 +41,30 @@ int main(int argc, char** argv) {
       char buff[32];
       cudaDeviceProp prop;
       if(cudaGetDeviceProperties(&prop, dev) != cudaSuccess) continue;
-      sprintf(buff, "%d%d", prop.major, prop.minor);
+      sprintf(buff, \"%d%d\", prop.major, prop.minor);
       archs.insert(buff);
     }
   }
   if(archs.empty()) {
-    printf("ALL");
+    printf(\"${SUPPORTED_CUDA_ARCHITECTURES}\");
   } else {
     bool first = true;
     for(const auto& arch : archs) {
-      printf(first? "%s" : ";%s", arch.c_str());
+      printf(first? \"%s\" : \";%s\", arch.c_str());
       first = false;
     }
   }
-  printf("\n");
+  printf(\"\\n\");
   return 0;
 }
-]=])
+")
   execute_process(
-    COMMAND ${CMAKE_CUDA_COMPILER}
-      -std=c++11
-      -o ${eval_exe}
-      --run
-      ${eval_file}
+    COMMAND ${CMAKE_CUDA_COMPILER} -std=c++11 -o ${eval_exe} --run ${eval_file}
     OUTPUT_VARIABLE __gpu_archs
     OUTPUT_STRIP_TRAILING_WHITESPACE
     ERROR_FILE ${error_file})
-  message(VERBOSE "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
-  set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE)
-endfunction()
+  message(STATUS "CUDF: Auto detection of gpu-archs: ${__gpu_archs}")
+  set(${gpu_archs}
+      ${__gpu_archs}
+      PARENT_SCOPE)
+endfunction(evaluate_gpu_archs)
diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake
@@ -25,35 +25,41 @@ else()
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "62" "72")
 endif()
 
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
+# CMake < 3.20 has a bug in FindCUDAToolkit where it won't properly detect the CUDAToolkit version
+# when find_package(CUDAToolkit) occurs before enable_language(CUDA)
+if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER)
+  execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--version" OUTPUT_VARIABLE NVCC_OUT)
+  if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+    set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+    set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+    set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+    set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+  endif()
+  unset(NVCC_OUT)
+endif()
+
+if(CUDAToolkit_VERSION_MAJOR LESS 11)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80")
 endif()
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 10)
+if(CUDAToolkit_VERSION_MAJOR LESS 10)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75")
 endif()
-if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9)
+if(CUDAToolkit_VERSION_MAJOR LESS 9)
   list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70")
 endif()
 
-if(CUDF_BUILD_FOR_DETECTED_ARCHS)
-  include(${CUDF_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
-  evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
-  if(CMAKE_CUDA_ARCHITECTURES STREQUAL "ALL")
-    unset(CMAKE_CUDA_ARCHITECTURES CACHE)
-    set(CUDF_BUILD_FOR_ALL_ARCHS TRUE)
-  else()
-    set(CUDF_BUILD_FOR_ALL_ARCHS FALSE)
-    list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
-  endif()
-endif()
-
-if(CUDF_BUILD_FOR_ALL_ARCHS)
+if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS)
   set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES})
-  # CMake architecture list entry of "80" means to build compute and sm.
-  # What we want is for the newest arch only to build that way
-  # while the rest built only for sm.
-  list(SORT CMAKE_CUDA_ARCHITECTURES ORDER ASCENDING)
+
+  # CMake architecture list entry of "80" means to build compute and sm. What we want is for the
+  # newest arch only to build that way while the rest built only for sm.
   list(POP_BACK CMAKE_CUDA_ARCHITECTURES latest_arch)
   list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
   list(APPEND CMAKE_CUDA_ARCHITECTURES ${latest_arch})
+
+elseif(${PROJECT_NAME}_BUILD_FOR_DETECTED_ARCHS)
+  include(${PROJECT_SOURCE_DIR}/cmake/Modules/EvalGPUArchs.cmake)
+  evaluate_gpu_archs(CMAKE_CUDA_ARCHITECTURES)
+
+  list(TRANSFORM CMAKE_CUDA_ARCHITECTURES APPEND "-real")
 endif()
diff --git a/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake b/cpp/cmake/thirdparty/CUDF_GetDLPack.cmake
@@ -16,7 +16,7 @@
 
 function(find_and_configure_dlpack VERSION)
     if(DLPACK_INCLUDE)
-        set(DLPACK_INCLUDE_DIR "${DLPACK_INCLUDE_DIR}" PARENT_SCOPE)
+        set(DLPACK_INCLUDE_DIR "${DLPACK_INCLUDE}" PARENT_SCOPE)
         return()
     endif()
     find_path(DLPACK_INCLUDE_DIR "dlpack"

diff --git a/cpp/cmake/thirdparty/CUDF_GetRMM.cmake b/cpp/cmake/thirdparty/CUDF_GetRMM.cmake
@@ -43,12 +43,18 @@ function(find_and_configure_rmm VERSION)
         OPTIONS         "BUILD_TESTS OFF"
                         "BUILD_BENCHMARKS OFF"
                         "CUDA_STATIC_RUNTIME ${CUDA_STATIC_RUNTIME}"
-                        "CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES}"
                         "DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}"
     )
     cudf_restore_if_enabled(BUILD_TESTS)
     cudf_restore_if_enabled(BUILD_BENCHMARKS)
 
+    #Make sure consumers of cudf can also see rmm::rmm
+    if(TARGET rmm::rmm)
+        get_target_property(rmm_is_imported rmm::rmm IMPORTED)
+        if(rmm_is_imported)
+            set_target_properties(rmm::rmm PROPERTIES IMPORTED_GLOBAL TRUE)
+        endif()
+    endif()
     if(NOT rmm_BINARY_DIR IN_LIST CMAKE_PREFIX_PATH)
         list(APPEND CMAKE_PREFIX_PATH "${rmm_BINARY_DIR}")
         set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} PARENT_SCOPE)

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
@@ -178,5 +178,29 @@ std::unique_ptr<column> binary_operation(
   data_type output_type,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Computes the `scale` for a `fixed_point` number based on given binary operator `op`
+ *
+ * @param op           The binary_operator used for two `fixed_point` numbers
+ * @param left_scale   Scale of left `fixed_point` number
+ * @param right_scale  Scale of right `fixed_point` number
+ * @return             The resulting `scale` of the computed `fixed_point` number
+ */
+int32_t binary_operation_fixed_point_scale(binary_operator op,
+                                           int32_t left_scale,
+                                           int32_t right_scale);
+
+/**
+ * @brief Computes the `data_type` for a `fixed_point` number based on given binary operator `op`
+ *
+ * @param op   The binary_operator used for two `fixed_point` numbers
+ * @param lhs  `cudf::data_type` of left `fixed_point` number
+ * @param rhs  `cudf::data_type` of right `fixed_point` number
+ * @return     The resulting `cudf::data_type` of the computed `fixed_point` number
+ */
+cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,
+                                                         cudf::data_type const& lhs,
+                                                         cudf::data_type const& rhs);
+
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -22,7 +22,7 @@
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 
 namespace cudf {
 namespace groupby {
@@ -40,8 +40,8 @@ namespace sort {
  *   value column
  */
 struct sort_groupby_helper {
-  using index_vector       = rmm::device_vector<size_type>;
-  using bitmask_vector     = rmm::device_vector<bitmask_type>;
+  using index_vector       = rmm::device_uvector<size_type>;
+  using bitmask_vector     = rmm::device_uvector<bitmask_type>;
   using column_ptr         = std::unique_ptr<column>;
   using index_vector_ptr   = std::unique_ptr<index_vector>;
   using bitmask_vector_ptr = std::unique_ptr<bitmask_vector>;

diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/lists/lists_column_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace lists {
+namespace detail {
+
+/**
+ * @copydoc cudf::lists::sort_lists
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> sort_lists(
+  lists_column_view const& input,
+  order column_order,
+  null_order null_precedence,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+}  // namespace detail
+}  // namespace lists
+}  // namespace cudf