Merge branch 'branch-22.06' into regex-refactor-builtins

rapidsai · May 9, 2022 · f4673ed · f4673ed
2 parents 2d91389 + 0fdb6dc
commit f4673ed
Show file tree

Hide file tree

Showing 29 changed files with 1,049 additions and 709 deletions.
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -29,6 +29,7 @@ export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
 # Parse git describe
 export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
+unset GIT_DESCRIBE_TAG
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
 export INSTALL_DASK_MAIN=1
@@ -79,30 +80,11 @@ conda info
 conda config --show-sources
 conda list --show-channel-urls
 
-gpuci_logger "Install dependencies"
-gpuci_mamba_retry install -y \
-                  "cudatoolkit=$CUDA_REL" \
-                  "rapids-build-env=$MINOR_VERSION.*" \
-                  "rapids-notebook-env=$MINOR_VERSION.*" \
-                  "dask-cuda=${MINOR_VERSION}" \
-                  "rmm=$MINOR_VERSION.*" \
-                  "ucx-py=${UCX_PY_VERSION}"
-
-# https://docs.rapids.ai/maintainers/depmgmt/
-# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
-# gpuci_mamba_retry install -y "your-pkg=1.0.0"
-
-
 gpuci_logger "Check compiler versions"
 python --version
 $CC --version
 $CXX --version
 
-gpuci_logger "Check conda environment"
-conda info
-conda config --show-sources
-conda list --show-channel-urls
-
 function install_dask {
     # Install the conda-forge or nightly version of dask and distributed
     gpuci_logger "Install the conda-forge or nightly version of dask and distributed"
@@ -125,6 +107,19 @@ function install_dask {
 
 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
 
+    gpuci_logger "Install dependencies"
+    gpuci_mamba_retry install -y \
+                  "cudatoolkit=$CUDA_REL" \
+                  "rapids-build-env=$MINOR_VERSION.*" \
+                  "rapids-notebook-env=$MINOR_VERSION.*" \
+                  "dask-cuda=${MINOR_VERSION}" \
+                  "rmm=$MINOR_VERSION.*" \
+                  "ucx-py=${UCX_PY_VERSION}"
+
+    # https://docs.rapids.ai/maintainers/depmgmt/
+    # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
+    # gpuci_mamba_retry install -y "your-pkg=1.0.0"
+
     install_dask
 
     ################################################################################
@@ -171,8 +166,19 @@ else
     gpuci_logger "Check GPU usage"
     nvidia-smi
 
+    gpuci_logger "Installing libcudf, libcudf_kafka and libcudf-tests"
     gpuci_mamba_retry install -y -c ${CONDA_ARTIFACT_PATH} libcudf libcudf_kafka libcudf-tests
 
+    gpuci_logger "Building cudf, dask-cudf, cudf_kafka and custreamz"
+    export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
+    gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
+    gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
+    gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
+    gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
+
+    gpuci_logger "Installing cudf, dask-cudf, cudf_kafka and custreamz"
+    gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
+
     gpuci_logger "GoogleTests"
     # Run libcudf and libcudf_kafka gtests from libcudf-tests package
     for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
@@ -209,12 +215,6 @@ else
             # test-results/*.cs.log are processed in gpuci
         fi
     fi
-
-    install_dask
-
-    gpuci_logger "Build python libs from source"
-    "$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
-
 fi
 
 # Both regular and Project Flash proceed here

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -242,6 +242,10 @@ ConfigureBench(PARQUET_WRITER_BENCH io/parquet/parquet_writer.cpp)
 # * orc writer benchmark --------------------------------------------------------------------------
 ConfigureBench(ORC_WRITER_BENCH io/orc/orc_writer.cpp)
 
+# ##################################################################################################
+# * orc writer chunks benchmark ---------------------------------------------------------------
+ConfigureNVBench(ORC_WRITER_CHUNKS_NVBENCH io/orc/orc_writer_chunks.cpp)
+
 # ##################################################################################################
 # * csv writer benchmark --------------------------------------------------------------------------
 ConfigureBench(CSV_WRITER_BENCH io/csv/csv_writer.cpp)

diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <cudf/column/column.hpp>
+#include <cudf/io/orc.hpp>
+#include <cudf/table/table.hpp>
+
+// to enable, run cmake with -DBUILD_BENCHMARKS=ON
+
+constexpr int64_t data_size = 512 << 20;
+
+namespace cudf_io = cudf::io;
+
+void nvbench_orc_write(nvbench::state& state)
+{
+  cudf::size_type num_cols = state.get_int64("num_columns");
+
+  auto tbl =
+    create_random_table(cycle_dtypes(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
+                                                        int32_t(type_group_id::FLOATING_POINT),
+                                                        int32_t(type_group_id::FIXED_POINT),
+                                                        int32_t(type_group_id::TIMESTAMP),
+                                                        int32_t(cudf::type_id::STRING),
+                                                        int32_t(cudf::type_id::STRUCT),
+                                                        int32_t(cudf::type_id::LIST)}),
+                                     num_cols),
+                        table_size_bytes{data_size});
+  cudf::table_view view = tbl->view();
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+
+  state.add_global_memory_reads<int64_t>(data_size);
+  state.add_element_count(view.num_columns() * view.num_rows());
+
+  size_t encoded_file_size = 0;
+
+  state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch, auto& timer) {
+               cuio_source_sink_pair source_sink(io_type::VOID);
+               timer.start();
+
+               cudf_io::orc_writer_options opts =
+                 cudf_io::orc_writer_options::builder(source_sink.make_sink_info(), view);
+               cudf_io::write_orc(opts);
+
+               timer.stop();
+               encoded_file_size = source_sink.size();
+             });
+
+  state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage");
+  state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size");
+  state.add_buffer_size(view.num_rows(), "trc", "Total Rows");
+}
+
+void nvbench_orc_chunked_write(nvbench::state& state)
+{
+  cudf::size_type num_cols   = state.get_int64("num_columns");
+  cudf::size_type num_tables = state.get_int64("num_chunks");
+
+  std::vector<std::unique_ptr<cudf::table>> tables;
+  for (cudf::size_type idx = 0; idx < num_tables; idx++) {
+    tables.push_back(
+      create_random_table(cycle_dtypes(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
+                                                          int32_t(type_group_id::FLOATING_POINT),
+                                                          int32_t(type_group_id::FIXED_POINT),
+                                                          int32_t(type_group_id::TIMESTAMP),
+                                                          int32_t(cudf::type_id::STRING),
+                                                          int32_t(cudf::type_id::STRUCT),
+                                                          int32_t(cudf::type_id::LIST)}),
+                                       num_cols),
+                          table_size_bytes{size_t(data_size / num_tables)}));
+  }
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+
+  auto size_iter = thrust::make_transform_iterator(
+    tables.begin(), [](auto const& i) { return i->num_columns() * i->num_rows(); });
+  auto row_count_iter =
+    thrust::make_transform_iterator(tables.begin(), [](auto const& i) { return i->num_rows(); });
+  auto total_elements = std::accumulate(size_iter, size_iter + num_tables, 0);
+  auto total_rows     = std::accumulate(row_count_iter, row_count_iter + num_tables, 0);
+
+  state.add_global_memory_reads<int64_t>(data_size);
+  state.add_element_count(total_elements);
+
+  size_t encoded_file_size = 0;
+
+  state.exec(
+    nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) {
+      cuio_source_sink_pair source_sink(io_type::VOID);
+      timer.start();
+
+      cudf_io::chunked_orc_writer_options opts =
+        cudf_io::chunked_orc_writer_options::builder(source_sink.make_sink_info());
+      cudf_io::orc_chunked_writer writer(opts);
+      std::for_each(tables.begin(),
+                    tables.end(),
+                    [&writer](std::unique_ptr<cudf::table> const& tbl) { writer.write(*tbl); });
+      writer.close();
+
+      timer.stop();
+      encoded_file_size = source_sink.size();
+    });
+
+  state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage");
+  state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size");
+  state.add_buffer_size(total_rows, "trc", "Total Rows");
+}
+
+NVBENCH_BENCH(nvbench_orc_write)
+  .set_name("orc_write")
+  .set_min_samples(4)
+  .add_int64_axis("num_columns", {8, 64});
+
+NVBENCH_BENCH(nvbench_orc_chunked_write)
+  .set_name("orc_chunked_write")
+  .set_min_samples(4)
+  .add_int64_axis("num_columns", {8, 64})
+  .add_int64_axis("num_chunks", {8, 64});