Merge remote-tracking branch 'upstream/branch-22.02' into 9822

rapidsai · Dec 14, 2021 · a775029 · a775029
2 parents 3a13ed8 + 7a23f1a
commit a775029
Show file tree

Hide file tree

Showing 29 changed files with 584 additions and 190 deletions.
diff --git a/build.sh b/build.sh
@@ -172,6 +172,12 @@ if buildAll || hasArg libcudf; then
         echo "Building for *ALL* supported GPU architectures..."
     fi
 
+    # get the current count before the compile starts
+    FILES_IN_CCACHE=""
+    if [ -x "$(command -v ccache)" ]; then
+        FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
+    fi
+
     cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
           -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
           ${CUDF_CMAKE_CUDA_ARCHITECTURES} \
@@ -185,7 +191,19 @@ if buildAll || hasArg libcudf; then
 
     cd ${LIB_BUILD_DIR}
 
+    compile_start=$(date +%s)
     cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG}
+    compile_end=$(date +%s)
+    compile_total=$(( compile_end - compile_start ))
+
+    # Record build times
+    if [[ -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
+        echo "Formatting build times"
+        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
+        message="$FILES_IN_CCACHE <p>$PARALLEL_LEVEL parallel build time is $compile_total seconds"
+        echo "$message"
+        python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$message" > ${LIB_BUILD_DIR}/ninja_log.html
+    fi
 
     if [[ ${INSTALL_TARGET} != "" ]]; then
         cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG}

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -166,16 +166,26 @@ else
     gpuci_logger "Check GPU usage"
     nvidia-smi
 
-    gpuci_logger "GoogleTests"
     set -x
     cd $LIB_BUILD_DIR
 
+    gpuci_logger "GoogleTests"
+
     for gt in gtests/* ; do
         test_name=$(basename ${gt})
         echo "Running GoogleTest $test_name"
         ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
     done
 
+    # Copy libcudf build time results
+    echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.html"
+    if [[ -f "$LIB_BUILD_DIR/ninja_log.html" ]]; then
+        gpuci_logger "Copying build time results"
+        cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
+        mkdir -p "$WORKSPACE/build-metrics"
+        cp "$LIB_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
+    fi
+
     ################################################################################
     # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
     ################################################################################

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -171,8 +171,12 @@ ConfigureBench(FILL_BENCH filling/repeat_benchmark.cpp)
 # ##################################################################################################
 # * groupby benchmark -----------------------------------------------------------------------------
 ConfigureBench(
-  GROUPBY_BENCH groupby/group_sum_benchmark.cu groupby/group_nth_benchmark.cu
-  groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu
+  GROUPBY_BENCH
+  groupby/group_sum_benchmark.cu
+  groupby/group_nth_benchmark.cu
+  groupby/group_shift_benchmark.cu
+  groupby/group_struct_benchmark.cu
+  groupby/group_no_requests_benchmark.cu
 )
 
 # ##################################################################################################

diff --git a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/copying.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <fixture/benchmark_fixture.hpp>
+#include <synchronization/synchronization.hpp>
+
+#include <memory>
+#include <random>
+
+class Groupby : public cudf::benchmark {
+};
+
+// TODO: put it in a struct so `uniform` can be remade with different min, max
+template <typename T>
+T random_int(T min, T max)
+{
+  static unsigned seed = 13377331;
+  static std::mt19937 engine{seed};
+  static std::uniform_int_distribution<T> uniform{min, max};
+
+  return uniform(engine);
+}
+
+void BM_basic_no_requests(benchmark::State& state)
+{
+  using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
+
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100); });
+
+  wrapper keys(data_it, data_it + column_size);
+  wrapper vals(data_it, data_it + column_size);
+
+  std::vector<cudf::groupby::aggregation_request> requests;
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    cudf::groupby::groupby gb_obj(cudf::table_view({keys}));
+    auto result = gb_obj.aggregate(requests);
+  }
+}
+
+BENCHMARK_DEFINE_F(Groupby, BasicNoRequest)(::benchmark::State& state)
+{
+  BM_basic_no_requests(state);
+}
+
+BENCHMARK_REGISTER_F(Groupby, BasicNoRequest)
+  ->UseManualTime()
+  ->Unit(benchmark::kMillisecond)
+  ->Arg(10000)
+  ->Arg(1000000)
+  ->Arg(10000000)
+  ->Arg(100000000);
+
+void BM_pre_sorted_no_requests(benchmark::State& state)
+{
+  using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
+
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100); });
+  auto valid_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100) < 90; });
+
+  wrapper keys(data_it, data_it + column_size);
+  wrapper vals(data_it, data_it + column_size, valid_it);
+
+  auto keys_table  = cudf::table_view({keys});
+  auto sort_order  = cudf::sorted_order(keys_table);
+  auto sorted_keys = cudf::gather(keys_table, *sort_order);
+  // No need to sort values using sort_order because they were generated randomly
+
+  std::vector<cudf::groupby::aggregation_request> requests;
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES);
+    auto result = gb_obj.aggregate(requests);
+  }
+}
+
+BENCHMARK_DEFINE_F(Groupby, PreSortedNoRequests)(::benchmark::State& state)
+{
+  BM_pre_sorted_no_requests(state);
+}
+
+BENCHMARK_REGISTER_F(Groupby, PreSortedNoRequests)
+  ->UseManualTime()
+  ->Unit(benchmark::kMillisecond)
+  ->Arg(1000000)
+  ->Arg(10000000)
+  ->Arg(100000000);
diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
@@ -0,0 +1,121 @@
+#
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+import argparse
+import os
+import sys
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "log_file", type=str, default=".ninja_log", help=".ninja_log file"
+)
+parser.add_argument(
+    "--fmt",
+    type=str,
+    default="csv",
+    choices=["csv", "xml", "html"],
+    help="output format (to stdout)",
+)
+parser.add_argument(
+    "--msg",
+    type=str,
+    default=None,
+    help="optional message to include in html output",
+)
+args = parser.parse_args()
+
+log_file = args.log_file
+log_path = os.path.dirname(os.path.abspath(log_file))
+
+output_fmt = args.fmt
+
+# build a map of the log entries
+entries = {}
+with open(log_file, "r") as log:
+    for line in log:
+        entry = line.split()
+        if len(entry) > 4:
+            elapsed = int(entry[1]) - int(entry[0])
+            obj_file = entry[3]
+            file_size = (
+                os.path.getsize(os.path.join(log_path, obj_file))
+                if os.path.exists(obj_file)
+                else 0
+            )
+            entries[entry[3]] = (elapsed, file_size)
+
+# check file could be loaded
+if len(entries) == 0:
+    print("Could not parse", log_file)
+    exit()
+
+# sort the keys by build time (descending order)
+keys = list(entries.keys())
+sl = sorted(keys, key=lambda k: entries[k][0], reverse=True)
+
+if output_fmt == "xml":
+    # output results in XML format
+    root = ET.Element("testsuites")
+    testsuite = ET.Element(
+        "testsuite",
+        attrib={
+            "name": "build-time",
+            "tests": str(len(keys)),
+            "failures": str(0),
+            "errors": str(0),
+        },
+    )
+    root.append(testsuite)
+    for key in sl:
+        entry = entries[key]
+        elapsed = float(entry[0]) / 1000
+        item = ET.Element(
+            "testcase",
+            attrib={
+                "classname": "BuildTime",
+                "name": key,
+                "time": str(elapsed),
+            },
+        )
+        testsuite.append(item)
+
+    tree = ET.ElementTree(root)
+    xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent="   ")
+    print(xmlstr)
+
+elif output_fmt == "html":
+    # output results in HTML format
+    print("<html><head><title>Sorted Ninja Build Times</title>")
+    print("<style>", "table, th, td { border:1px solid black; }", "</style>")
+    print("</head><body>")
+    if args.msg is not None:
+        print("<p>", args.msg, "</p>")
+    print("<table>")
+    print(
+        "<tr><th>File</th>",
+        "<th align='right'>Compile time (ms)</th>",
+        "<th align='right'>Size (bytes)</th><tr>",
+        sep="",
+    )
+    for key in sl:
+        result = entries[key]
+        print(
+            "<tr><td>",
+            key,
+            "</td><td align='right'>",
+            result[0],
+            "</td><td align='right'>",
+            result[1],
+            "</td></tr>",
+            sep="",
+        )
+    print("</table></body></html>")
+
+else:
+    # output results in CSV format
+    print("time,size,file")
+    for key in sl:
+        result = entries[key]
+        print(result[0], result[1], key, sep=",")
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
@@ -636,23 +636,16 @@ std::unique_ptr<table> groupby(table_view const& keys,
  */
 bool can_use_hash_groupby(table_view const& keys, host_span<aggregation_request const> requests)
 {
-  auto const all_hash_aggregations =
-    std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
-      return cudf::has_atomic_support(r.values.type()) and
-             std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) {
-               return is_hash_aggregation(a->kind);
-             });
-    });
-
-  // Currently, structs are not supported in any of hash-based aggregations.
-  // Therefore, if any request contains structs then we must fallback to sort-based aggregations.
-  // TODO: Support structs in hash-based aggregations.
-  auto const has_struct =
-    std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
-      return r.values.type().id() == type_id::STRUCT;
-    });
-
-  return all_hash_aggregations && !has_struct;
+  return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) {
+    // Currently, structs are not supported in any of hash-based aggregations.
+    // Therefore, if any request contains structs then we must fallback to sort-based aggregations.
+    // TODO: Support structs in hash-based aggregations.
+    return not(r.values.type().id() == type_id::STRUCT) and
+           cudf::has_atomic_support(r.values.type()) and
+           std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) {
+             return is_hash_aggregation(a->kind);
+           });
+  });
 }
 
 // Hash-based groupby

diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
@@ -70,8 +70,7 @@ class Column:
     def nullable(self) -> bool:
         ...
 
-    @property
-    def has_nulls(self) -> bool:
+    def has_nulls(self, include_nan: bool=False) -> bool:
         ...
 
     @property

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
@@ -146,8 +146,7 @@ cdef class Column:
     def nullable(self):
         return self.base_mask is not None
 
-    @property
-    def has_nulls(self):
+    def has_nulls(self, include_nan=False):
         return self.null_count != 0
 
     @property

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
@@ -147,7 +147,7 @@ def _clean_nulls_from_index(self):
         methods using this method to replace or handle representation
         of the actual types correctly.
         """
-        if self._values.has_nulls:
+        if self._values.has_nulls():
             return cudf.Index(
                 self._values.astype("str").fillna(cudf._NA_REP), name=self.name
             )