Merge branch 'branch-22.02' of github.com:rapidsai/cudf into improvem…

…ent/use_list_of_columns_apply_boolean_mask
rapidsai · Jan 10, 2022 · 33be855 · 33be855
2 parents 7d30dc8 + 0d5ec7f
commit 33be855
Show file tree

Hide file tree

Showing 37 changed files with 2,725 additions and 579 deletions.
diff --git a/build.sh b/build.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 # cuDF build script
 
@@ -17,7 +17,7 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h"
+VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
 HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\\\"<args>\\\"]
    clean                         - remove all existing build artifacts and configuration (start
                                    over)
@@ -37,6 +37,8 @@ HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafk
    --disable_nvtx                - disable inserting NVTX profiling ranges
    --show_depr_warn              - show cmake deprecation warnings
    --ptds                        - enable per-thread default stream
+   --build_metrics               - generate build metrics report for libcudf
+   --incl_cache_stats            - include cache statistics in build metrics report
    --cmake-args=\\\"<args>\\\"   - pass arbitrary list of CMake configuration options (escape all quotes in argument)
    -h | --h[elp]                 - print this text
 
@@ -61,6 +63,8 @@ BUILD_NVTX=ON
 BUILD_TESTS=OFF
 BUILD_DISABLE_DEPRECATION_WARNING=ON
 BUILD_PER_THREAD_DEFAULT_STREAM=OFF
+BUILD_REPORT_METRICS=OFF
+BUILD_REPORT_INCL_CACHE_STATS=OFF
 
 # Set defaults for vars that may not have been defined externally
 #  FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
@@ -144,6 +148,14 @@ fi
 if hasArg --ptds; then
     BUILD_PER_THREAD_DEFAULT_STREAM=ON
 fi
+if hasArg --build_metrics; then
+    BUILD_REPORT_METRICS=ON
+fi
+
+if hasArg --incl_cache_stats; then
+    BUILD_REPORT_INCL_CACHE_STATS=ON
+fi
+
 
 # If clean given, run it prior to any other steps
 if hasArg clean; then
@@ -174,8 +186,11 @@ if buildAll || hasArg libcudf; then
 
     # get the current count before the compile starts
     FILES_IN_CCACHE=""
-    if [ -x "$(command -v ccache)" ]; then
+    if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
         FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
+        echo "$FILES_IN_CCACHE"
+        # zero the ccache statistics
+        ccache -z
     fi
 
     cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
@@ -197,12 +212,24 @@ if buildAll || hasArg libcudf; then
     compile_total=$(( compile_end - compile_start ))
 
     # Record build times
-    if [[ -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
-        echo "Formatting build times"
+    if [[ "$BUILD_REPORT_METRICS"=="ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
+        echo "Formatting build metrics"
         python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
-        message="$FILES_IN_CCACHE <p>$PARALLEL_LEVEL parallel build time is $compile_total seconds"
-        echo "$message"
-        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$message" > ${LIB_BUILD_DIR}/ninja_log.html
+        MSG="<p>"
+        # get some ccache stats after the compile
+        if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
+           MSG="${MSG}<br/>$FILES_IN_CCACHE"
+           HIT_RATE=$(ccache -s | grep "cache hit rate")
+           MSG="${MSG}<br/>${HIT_RATE}"
+        fi
+        MSG="${MSG}<br/>parallel setting: $PARALLEL_LEVEL"
+        MSG="${MSG}<br/>parallel build time: $compile_total seconds"
+        if [[ -f "${LIB_BUILD_DIR}/libcudf.so" ]]; then
+           LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
+           MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
+        fi
+        echo "$MSG"
+        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
     fi
 
     if [[ ${INSTALL_TARGET} != "" ]]; then

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
@@ -78,6 +78,14 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
   mkdir -p ${CONDA_BLD_DIR}/libcudf/work
   cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
 
+  # Copy libcudf build metrics results
+  LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
+  echo "Checking for build metrics log $LIBCUDF_BUILD_DIR/ninja_log.html"
+  if [[ -f "$LIBCUDF_BUILD_DIR/ninja_log.html" ]]; then
+      gpuci_logger "Copying build metrics results"
+      mkdir -p "$WORKSPACE/build-metrics"
+      cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
+  fi
 
   gpuci_logger "Build conda pkg for libcudf_kafka"
   gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf_kafka $CONDA_BUILD_ARGS

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -181,12 +181,10 @@ else
     done
 
     # Copy libcudf build time results
-    echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.html"
-    if [[ -f "$LIB_BUILD_DIR/ninja_log.html" ]]; then
+    echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
+    if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
         gpuci_logger "Copying build time results"
         cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
-        mkdir -p "$WORKSPACE/build-metrics"
-        cp "$LIB_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
     fi
 
     ################################################################################

diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
@@ -77,7 +77,7 @@ conda config --show-sources
 conda list --show-channel-urls
 
 gpuci_logger "Install dependencies"
-gpuci_conda_retry install -y \
+gpuci_mamba_retry install -y \
                   "cudatoolkit=$CUDA_REL" \
                   "rapids-build-env=$MINOR_VERSION.*" \
                   "rapids-notebook-env=$MINOR_VERSION.*" \
@@ -86,10 +86,14 @@ gpuci_conda_retry install -y \
                   "ucx-py=${UCX_PY_VERSION}" \
                   "openjdk=8.*" \
                   "maven"
+# "mamba install openjdk" adds an activation script to set JAVA_HOME but this is
+# not triggered on installation. Re-activating the conda environment will set
+# this environment variable so that CMake can find JNI.
+conda activate rapids
 
 # https://docs.rapids.ai/maintainers/depmgmt/
 # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
-# gpuci_conda_retry install -y "your-pkg=1.0.0"
+# gpuci_mamba_retry install -y "your-pkg=1.0.0"
 
 
 gpuci_logger "Check compiler versions"
@@ -130,7 +134,7 @@ KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without e
 KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install
 
 gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
-conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
+gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
 
 install_dask
 

diff --git a/conda/recipes/libcudf/build.sh b/conda/recipes/libcudf/build.sh
@@ -4,5 +4,5 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
     # This assumes the script is executed from the root of the repo directory
     ./build.sh -v libcudf --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
 else
-    ./build.sh -v libcudf tests --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
+    ./build.sh -v libcudf tests --allgpuarch --build_metrics --incl_cache_stats --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
 fi
diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp
@@ -1,7 +1,7 @@
 /*
  * Copyright 2019 BlazingDB, Inc.
  *     Copyright 2019 Eyal Rozenberg <[email protected]>
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,12 +33,18 @@ namespace cudf {
 //! Utility functions
 namespace util {
 /**
- * Finds the smallest integer not less than `number_to_round` and modulo `S` is
- * zero. This function assumes that `number_to_round` is non-negative and
- * `modulus` is positive.
+ * @brief Rounds `number_to_round` up to the next multiple of modulus
+ *
+ * @tparam S type to return
+ * @param number_to_round number that is being rounded
+ * @param modulus value to which to round
+ * @return smallest integer greater than `number_to_round` and modulo `S` is zero.
+ *
+ * @note This function assumes that `number_to_round` is non-negative and
+ * `modulus` is positive. The safety is in regard to rollover.
  */
 template <typename S>
-inline S round_up_safe(S number_to_round, S modulus)
+S round_up_safe(S number_to_round, S modulus)
 {
   auto remainder = number_to_round % modulus;
   if (remainder == 0) { return number_to_round; }
@@ -50,18 +56,44 @@ inline S round_up_safe(S number_to_round, S modulus)
 }
 
 /**
- * Finds the largest integer not greater than `number_to_round` and modulo `S` is
- * zero. This function assumes that `number_to_round` is non-negative and
- * `modulus` is positive.
+ * @brief Rounds `number_to_round` down to the last multiple of modulus
+ *
+ * @tparam S type to return
+ * @param number_to_round number that is being rounded
+ * @param modulus value to which to round
+ * @return largest integer not greater than `number_to_round` and modulo `S` is zero.
+ *
+ * @note This function assumes that `number_to_round` is non-negative and
+ * `modulus` is positive and does not check for overflow.
  */
 template <typename S>
-inline S round_down_safe(S number_to_round, S modulus)
+S round_down_safe(S number_to_round, S modulus) noexcept
 {
   auto remainder    = number_to_round % modulus;
   auto rounded_down = number_to_round - remainder;
   return rounded_down;
 }
 
+/**
+ * @brief Rounds `number_to_round` up to the next multiple of modulus
+ *
+ * @tparam S type to return
+ * @param number_to_round number that is being rounded
+ * @param modulus value to which to round
+ * @return smallest integer greater than `number_to_round` and modulo `S` is zero.
+ *
+ * @note This function assumes that `number_to_round` is non-negative and
+ * `modulus` is positive and does not check for overflow.
+ */
+template <typename S>
+constexpr S round_up_unsafe(S number_to_round, S modulus) noexcept
+{
+  auto remainder = number_to_round % modulus;
+  if (remainder == 0) { return number_to_round; }
+  auto rounded_up = number_to_round - remainder + modulus;
+  return rounded_up;
+}
+
 /**
  * Divides the left-hand-side by the right-hand-side, rounding up
  * to an integral multiple of the right-hand-side, e.g. (9,5) -> 2 , (10,5) -> 2, (11,5) -> 3.
@@ -75,26 +107,24 @@ inline S round_down_safe(S number_to_round, S modulus)
  * the result will be incorrect
  */
 template <typename S, typename T>
-constexpr inline S div_rounding_up_unsafe(const S& dividend, const T& divisor) noexcept
+constexpr S div_rounding_up_unsafe(const S& dividend, const T& divisor) noexcept
 {
   return (dividend + divisor - 1) / divisor;
 }
 
 namespace detail {
 template <typename I>
-constexpr inline I div_rounding_up_safe(std::integral_constant<bool, false>,
-                                        I dividend,
-                                        I divisor) noexcept
+constexpr I div_rounding_up_safe(std::integral_constant<bool, false>,
+                                 I dividend,
+                                 I divisor) noexcept
 {
   // TODO: This could probably be implemented faster
   return (dividend > divisor) ? 1 + div_rounding_up_unsafe(dividend - divisor, divisor)
                               : (dividend > 0);
 }
 
 template <typename I>
-constexpr inline I div_rounding_up_safe(std::integral_constant<bool, true>,
-                                        I dividend,
-                                        I divisor) noexcept
+constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend, I divisor) noexcept
 {
   auto quotient  = dividend / divisor;
   auto remainder = dividend % divisor;
@@ -116,14 +146,14 @@ constexpr inline I div_rounding_up_safe(std::integral_constant<bool, true>,
  * approach of using (dividend + divisor - 1) / divisor
  */
 template <typename I>
-constexpr inline I div_rounding_up_safe(I dividend, I divisor) noexcept
+constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
 {
   using i_is_a_signed_type = std::integral_constant<bool, std::is_signed<I>::value>;
   return detail::div_rounding_up_safe(i_is_a_signed_type{}, dividend, divisor);
 }
 
 template <typename I>
-constexpr inline bool is_a_power_of_two(I val) noexcept
+constexpr bool is_a_power_of_two(I val) noexcept
 {
   static_assert(std::is_integral<I>::value, "This function only applies to integral types");
   return ((val - 1) & val) == 0;
@@ -153,7 +183,7 @@ constexpr inline bool is_a_power_of_two(I val) noexcept
  * @return Absolute value if value type is signed.
  */
 template <typename T>
-constexpr inline auto absolute_value(T value) -> T
+constexpr auto absolute_value(T value) -> T
 {
   if constexpr (cuda::std::is_signed<T>()) return numeric::detail::abs(value);
   return value;

diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
@@ -416,6 +416,11 @@ class table_input_metadata {
 struct partition_info {
   size_type start_row;
   size_type num_rows;
+
+  partition_info() = default;
+  partition_info(size_type start_row, size_type num_rows) : start_row(start_row), num_rows(num_rows)
+  {
+  }
 };
 
 }  // namespace io

diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
@@ -88,21 +88,38 @@
 elif output_fmt == "html":
     # output results in HTML format
     print("<html><head><title>Sorted Ninja Build Times</title>")
-    print("<style>", "table, th, td { border:1px solid black; }", "</style>")
+    # Note: Jenkins does not support style defined in the html
+    # https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
     print("</head><body>")
     if args.msg is not None:
         print("<p>", args.msg, "</p>")
     print("<table>")
     print(
         "<tr><th>File</th>",
-        "<th align='right'>Compile time (ms)</th>",
-        "<th align='right'>Size (bytes)</th><tr>",
+        "<th>Compile time<br/>(ms)</th>",
+        "<th>Size<br/>(bytes)</th><tr>",
         sep="",
     )
+    summary = {"red": 0, "yellow": 0, "green": 0}
+    red = "bgcolor='#FFBBD0'"
+    yellow = "bgcolor='#FFFF80'"
+    green = "bgcolor='#AAFFBD'"
     for key in sl:
         result = entries[key]
+        elapsed = result[0]
+        color = green
+        if elapsed > 300000:  # 5 minutes
+            color = red
+            summary["red"] += 1
+        elif elapsed > 120000:  # 2 minutes
+            color = yellow
+            summary["yellow"] += 1
+        else:
+            summary["green"] += 1
         print(
-            "<tr><td>",
+            "<tr ",
+            color,
+            "><td>",
             key,
             "</td><td align='right'>",
             result[0],
@@ -111,6 +128,14 @@
             "</td></tr>",
             sep="",
         )
+    print("</table><br/><table border='2'>")
+    # include summary table with color legend
+    print("<tr><td", red, ">time &gt; 5 minutes</td>")
+    print("<td align='right'>", summary["red"], "</td></tr>")
+    print("<tr><td", yellow, ">2 minutes &lt; time &lt; 5 minutes</td>")
+    print("<td align='right'>", summary["yellow"], "</td></tr>")
+    print("<tr><td", green, ">time &lt; 2 minutes</td>")
+    print("<td align='right'>", summary["green"], "</td></tr>")
     print("</table></body></html>")
 
 else:

diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
@@ -40,13 +40,6 @@ namespace {
 // align all column size allocations to this boundary so that all output column buffers
 // start at that alignment.
 static constexpr std::size_t split_align = 64;
-inline __device__ std::size_t _round_up_safe(std::size_t number_to_round, std::size_t modulus)
-{
-  auto remainder = number_to_round % modulus;
-  if (remainder == 0) { return number_to_round; }
-  auto rounded_up = number_to_round - remainder + modulus;
-  return rounded_up;
-}
 
 /**
  * @brief Struct which contains information on a source buffer.
@@ -960,7 +953,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
       std::size_t const bytes =
         static_cast<std::size_t>(num_elements) * static_cast<std::size_t>(element_size);
 
-      return dst_buf_info{_round_up_safe(bytes, 64),
+      return dst_buf_info{util::round_up_unsafe(bytes, 64ul),
                           num_elements,
                           element_size,
                           num_rows,