Merge branch 'branch-22.04' into pyupgrade

rapidsai · Feb 3, 2022 · 8d33659 · 8d33659
2 parents 0c7d002 + 0581975
commit 8d33659
Show file tree

Hide file tree

Showing 14 changed files with 476 additions and 99 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/build.sh b/build.sh
@@ -185,12 +185,9 @@ if buildAll || hasArg libcudf; then
     fi
 
     # get the current count before the compile starts
-    FILES_IN_CCACHE=""
-    if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v ccache)" ]]; then
-        FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
-        echo "$FILES_IN_CCACHE"
-        # zero the ccache statistics
-        ccache -z
+    if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
+        # zero the sccache statistics
+        sccache --zero-stats
     fi
 
     cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
@@ -216,11 +213,12 @@ if buildAll || hasArg libcudf; then
         echo "Formatting build metrics"
         python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
         MSG="<p>"
-        # get some ccache stats after the compile
-        if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
-           MSG="${MSG}<br/>$FILES_IN_CCACHE"
-           HIT_RATE=$(ccache -s | grep "cache hit rate")
-           MSG="${MSG}<br/>${HIT_RATE}"
+        # get some sccache stats after the compile
+        if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
+           COMPILE_REQUESTS=$(sccache -s | grep "Compile requests \+ [0-9]\+$" | awk '{ print $NF }')
+           CACHE_HITS=$(sccache -s | grep "Cache hits \+ [0-9]\+$" | awk '{ print $NF }')
+           HIT_RATE=$(echo - | awk "{printf \"%.2f\n\", $CACHE_HITS / $COMPILE_REQUESTS * 100}")
+           MSG="${MSG}<br/>cache hit rate ${HIT_RATE} %"
         fi
         MSG="${MSG}<br/>parallel setting: $PARALLEL_LEVEL"
         MSG="${MSG}<br/>parallel build time: $compile_total seconds"

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
@@ -31,6 +31,10 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then
   export VERSION_SUFFIX=`date +%y%m%d`
 fi
 
+export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"
+export CMAKE_CXX_COMPILER_LAUNCHER="sccache"
+export CMAKE_C_COMPILER_LAUNCHER="sccache"
+
 ################################################################################
 # SETUP - Check environment
 ################################################################################
@@ -77,6 +81,8 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
   gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS
   mkdir -p ${CONDA_BLD_DIR}/libcudf/work
   cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
+  gpuci_logger "sccache stats"
+  sccache --show-stats
 
   # Copy libcudf build metrics results
   LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -36,6 +36,10 @@ export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'
 # ucx-py version
 export UCX_PY_VERSION='0.25.*'
 
+export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"
+export CMAKE_CXX_COMPILER_LAUNCHER="sccache"
+export CMAKE_C_COMPILER_LAUNCHER="sccache"
+
 ################################################################################
 # TRAP - Setup trap for removing jitify cache
 ################################################################################

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -22,13 +22,15 @@ build:
     - PARALLEL_LEVEL
     - VERSION_SUFFIX
     - PROJECT_FLASH
-    - CCACHE_DIR
-    - CCACHE_NOHASHDIR
-    - CCACHE_COMPILERCHECK
     - CMAKE_GENERATOR
     - CMAKE_C_COMPILER_LAUNCHER
     - CMAKE_CXX_COMPILER_LAUNCHER
     - CMAKE_CUDA_COMPILER_LAUNCHER
+    - SCCACHE_S3_KEY_PREFIX=libcudf-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libcudf-linux64 # [linux64]
+    - SCCACHE_BUCKET=rapids-sccache
+    - SCCACHE_REGION=us-west-2
+    - SCCACHE_IDLE_TIMEOUT=32768
   run_exports:
     - {{ pin_subpackage("libcudf", max_pin="x.x") }}
 

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
@@ -45,7 +45,7 @@ enum class binary_operator : int32_t {
   PMOD,                  ///< positive modulo operator
                          ///< If remainder is negative, this returns (remainder + divisor) % divisor
                          ///< else, it returns (dividend % divisor)
-  PYMOD,                 ///< operator % but following python's sign rules for negatives
+  PYMOD,                 ///< operator % but following Python's sign rules for negatives
   POW,                   ///< lhs ^ rhs
   LOG_BASE,              ///< logarithm to the base
   ATAN2,                 ///< 2-argument arctangent

diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -440,6 +440,21 @@ class fixed_point {
   CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator/(
     fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
 
+  /**
+   * @brief operator % (for computing the modulo operation of two `fixed_point` numbers)
+   *
+   * If `_scale`s are equal, the modulus is computed directly.
+   * If `_scale`s are not equal, the number with larger `_scale` is shifted to the
+   * smaller `_scale`, and then the modulus is computed.
+   *
+   * @tparam Rep1 Representation type of number being modulo-ed to `this`
+   * @tparam Rad1 Radix (base) type of number being modulo-ed to `this`
+   * @return The resulting `fixed_point` number
+   */
+  template <typename Rep1, Radix Rad1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator%(
+    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+
   /**
    * @brief operator == (for comparing two `fixed_point` numbers)
    *
@@ -750,6 +765,16 @@ CUDF_HOST_DEVICE inline bool operator>(fixed_point<Rep1, Rad1> const& lhs,
   return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value;
 }
 
+// MODULO OPERATION
+template <typename Rep1, Radix Rad1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator%(fixed_point<Rep1, Rad1> const& lhs,
+                                                          fixed_point<Rep1, Rad1> const& rhs)
+{
+  auto const scale     = std::min(lhs._scale, rhs._scale);
+  auto const remainder = lhs.rescaled(scale)._value % rhs.rescaled(scale)._value;
+  return fixed_point<Rep1, Rad1>{scaled_integer<Rep1>{remainder, scale}};
+}
+
 using decimal32  = fixed_point<int32_t, Radix::BASE_10>;
 using decimal64  = fixed_point<int64_t, Radix::BASE_10>;
 using decimal128 = fixed_point<__int128_t, Radix::BASE_10>;

diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
@@ -88,7 +88,10 @@ bool is_basic_arithmetic_binop(binary_operator op)
          op == binary_operator::MUL or       // operator *
          op == binary_operator::DIV or       // operator / using common type of lhs and rhs
          op == binary_operator::NULL_MIN or  // 2 null = null, 1 null = value, else min
-         op == binary_operator::NULL_MAX;    // 2 null = null, 1 null = value, else max
+         op == binary_operator::NULL_MAX or  // 2 null = null, 1 null = value, else max
+         op == binary_operator::MOD or       // operator %
+         op == binary_operator::PMOD or      // positive modulo operator
+         op == binary_operator::PYMOD;  // operator % but following Python's negative sign rules
 }
 
 /**

diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh
@@ -162,12 +162,24 @@ struct PMod {
     if (rem < 0) rem = std::fmod(rem + yconv, yconv);
     return rem;
   }
+
+  template <typename TypeLhs,
+            typename TypeRhs,
+            std::enable_if_t<cudf::is_fixed_point<TypeLhs>() and
+                             std::is_same_v<TypeLhs, TypeRhs>>* = nullptr>
+  __device__ inline auto operator()(TypeLhs x, TypeRhs y)
+  {
+    auto const remainder = x % y;
+    return remainder.value() < 0 ? (remainder + y) % y : remainder;
+  }
 };
 
 struct PyMod {
   template <typename TypeLhs,
             typename TypeRhs,
-            std::enable_if_t<(std::is_integral_v<std::common_type_t<TypeLhs, TypeRhs>>)>* = nullptr>
+            std::enable_if_t<(std::is_integral_v<std::common_type_t<TypeLhs, TypeRhs>> or
+                              (cudf::is_fixed_point<TypeLhs>() and
+                               std::is_same_v<TypeLhs, TypeRhs>))>* = nullptr>
   __device__ inline auto operator()(TypeLhs x, TypeRhs y) -> decltype(((x % y) + y) % y)
   {
     return ((x % y) + y) % y;

diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp
@@ -45,7 +45,11 @@ struct common_type_functor {
         // Eg. d=t-t
         return data_type{type_to_id<TypeCommon>()};
       }
-      return {};
+
+      // A compiler bug may cause a compilation error when using empty initializer list to construct
+      // an std::optional object containing no `data_type` value. Therefore, we should explicitly
+      // return `std::nullopt` instead.
+      return std::nullopt;
     }
   };
   template <typename TypeLhs, typename TypeRhs>