Skip to content

Commit

Permalink
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into improvem…
Browse files Browse the repository at this point in the history
…ent/rewrite_sample
  • Loading branch information
isVoid committed Feb 24, 2022
2 parents f3b05ec + aa746ae commit b08a9b5
Show file tree
Hide file tree
Showing 137 changed files with 2,779 additions and 1,372 deletions.
3 changes: 1 addition & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARGS=$*
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\\\"<args>\\\"]
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
Expand All @@ -32,7 +32,6 @@ HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafk
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
Expand Down
8 changes: 2 additions & 6 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#########################################
# cuDF GPU build and test script for CI #
#########################################
Expand Down Expand Up @@ -98,11 +98,7 @@ conda list --show-channel-urls
################################################################################

logger "Build libcudf..."
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
fi
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds

################################################################################
# BENCHMARK - Run and parse libcudf and cuDF benchmarks
Expand Down
20 changes: 7 additions & 13 deletions ci/cpu/upload.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#
# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh

Expand All @@ -23,56 +24,49 @@ if [ -z "$MY_UPLOAD_KEY" ]; then
return 0
fi

################################################################################
# SETUP - Get conda file output locations
################################################################################

gpuci_logger "Get conda file output locations"

export LIBCUDF_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf --output`
export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf_kafka --output`
export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output`
export DASK_CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON --output`
export CUDF_KAFKA_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON --output`
export CUSTREAMZ_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON --output`

################################################################################
# UPLOAD - Conda packages
################################################################################

gpuci_logger "Starting conda uploads"
if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
export LIBCUDF_FILE=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
test -e ${LIBCUDF_FILE}
echo "Upload libcudf"
echo ${LIBCUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_FILE} --no-progress
fi

if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
export CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf --python=$PYTHON --output)
test -e ${CUDF_FILE}
echo "Upload cudf"
echo ${CUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE} --no-progress

export DASK_CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/dask-cudf --python=$PYTHON --output)
test -e ${DASK_CUDF_FILE}
echo "Upload dask-cudf"
echo ${DASK_CUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE} --no-progress

export CUSTREAMZ_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/custreamz --python=$PYTHON --output)
test -e ${CUSTREAMZ_FILE}
echo "Upload custreamz"
echo ${CUSTREAMZ_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE} --no-progress
fi

if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF_KAFKA" == "1" ]]; then
export LIBCUDF_KAFKA_FILE=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf_kafka --output)
test -e ${LIBCUDF_KAFKA_FILE}
echo "Upload libcudf_kafka"
echo ${LIBCUDF_KAFKA_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_KAFKA_FILE} --no-progress
fi

if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF_KAFKA" == "1" ]]; then
export CUDF_KAFKA_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf_kafka --python=$PYTHON --output)
test -e ${CUDF_KAFKA_FILE}
echo "Upload cudf_kafka"
echo ${CUDF_KAFKA_FILE}
Expand Down
20 changes: 6 additions & 14 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
##############################################
# cuDF GPU build and test script for CI #
##############################################
Expand Down Expand Up @@ -128,11 +128,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
fi
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds

################################################################################
# TEST - Run GoogleTest
Expand Down Expand Up @@ -226,11 +222,7 @@ else
install_dask

gpuci_logger "Build python libs from source"
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
fi
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds

fi

Expand All @@ -249,15 +241,15 @@ fi

cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope cudf

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf

cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz

gpuci_logger "Test notebooks"
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
Expand Down
2 changes: 0 additions & 2 deletions conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ requirements:
host:
- libcudf {{version}}
- librdkafka >=1.7.0,<1.8.0a0
run:
- {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not

test:
commands:
Expand Down
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,8 @@ set_target_properties(
# set target compile options
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
# For std:: support of __int128_t. Can be removed once using cuda::std
CXX_EXTENSIONS ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON
Expand Down
8 changes: 7 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ function(ConfigureBench CMAKE_BENCH_NAME)
${CMAKE_BENCH_NAME}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${CUDF_BINARY_DIR}/benchmarks>"
INSTALL_RPATH "\$ORIGIN/../../../lib"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
# For std:: support of __int128_t. Can be removed once using cuda::std
CXX_EXTENSIONS ON
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
Expand Down Expand Up @@ -254,7 +260,7 @@ ConfigureBench(
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cpp
string/copy.cu
string/extract.cpp
string/factory.cu
string/filter.cpp
Expand Down
15 changes: 7 additions & 8 deletions cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -69,15 +69,14 @@ distribution_id default_distribution_id()
return distribution_id::GEOMETRIC;
}

template <typename T,
std::enable_if_t<!std::is_unsigned<T>::value && cudf::is_numeric<T>()>* = nullptr>
template <typename T, std::enable_if_t<!std::is_unsigned_v<T> && cudf::is_numeric<T>()>* = nullptr>
distribution_id default_distribution_id()
{
return distribution_id::NORMAL;
}

template <typename T,
std::enable_if_t<!std::is_same_v<T, bool> && std::is_unsigned<T>::value &&
std::enable_if_t<!std::is_same_v<T, bool> && std::is_unsigned_v<T> &&
cudf::is_numeric<T>()>* = nullptr>
distribution_id default_distribution_id()
{
Expand Down Expand Up @@ -226,7 +225,7 @@ class data_profile {
public:
template <
typename T,
typename std::enable_if_t<!std::is_same_v<T, bool> && std::is_integral<T>::value, T>* = nullptr>
typename std::enable_if_t<!std::is_same_v<T, bool> && std::is_integral_v<T>, T>* = nullptr>
distribution_params<T> get_distribution_params() const
{
auto it = int_params.find(cudf::type_to_id<T>());
Expand All @@ -239,7 +238,7 @@ class data_profile {
}
}

template <typename T, typename std::enable_if_t<std::is_floating_point<T>::value, T>* = nullptr>
template <typename T, typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
distribution_params<T> get_distribution_params() const
{
auto it = float_params.find(cudf::type_to_id<T>());
Expand Down Expand Up @@ -307,7 +306,7 @@ class data_profile {
// discrete distributions (integers, strings, lists). Otherwise the call with have no effect.
template <typename T,
typename Type_enum,
typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
void set_distribution_params(Type_enum type_or_group,
distribution_id dist,
T lower_bound,
Expand All @@ -331,7 +330,7 @@ class data_profile {
// have continuous distributions (floating point types). Otherwise the call with have no effect.
template <typename T,
typename Type_enum,
typename std::enable_if_t<std::is_floating_point<T>::value, T>* = nullptr>
typename std::enable_if_t<std::is_floating_point_v<T>, T>* = nullptr>
void set_distribution_params(Type_enum type_or_group,
distribution_id dist,
T lower_bound,
Expand Down
10 changes: 5 additions & 5 deletions cpp/benchmarks/common/random_distribution_factory.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@
/**
* @brief Generates a normal(binomial) distribution between zero and upper_bound.
*/
template <typename T, typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
auto make_normal_dist(T upper_bound)
{
using uT = typename std::make_unsigned<T>::type;
Expand All @@ -42,7 +42,7 @@ auto make_normal_dist(T upper_bound)
return std::normal_distribution<T>(mean, stddev);
}

template <typename T, typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
auto make_uniform_dist(T range_start, T range_end)
{
return std::uniform_int_distribution<T>(range_start, range_end);
Expand All @@ -62,7 +62,7 @@ double geometric_dist_p(T range_size)
return p ? p : std::numeric_limits<double>::epsilon();
}

template <typename T, typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
auto make_geometric_dist(T range_start, T range_end)
{
using uT = typename std::make_unsigned<T>::type;
Expand All @@ -82,7 +82,7 @@ auto make_geometric_dist(T range_start, T range_end)
template <typename T>
using distribution_fn = std::function<T(std::mt19937&)>;

template <typename T, typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
template <typename T, typename std::enable_if_t<std::is_integral_v<T>, T>* = nullptr>
distribution_fn<T> make_distribution(distribution_id did, T lower_bound, T upper_bound)
{
switch (did) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -135,12 +135,14 @@ static void BM_multibyte_split(benchmark::State& state)
default: CUDF_FAIL();
}

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
cuda_event_timer raii(state, true);
auto output = cudf::io::text::multibyte_split(*source, delim);
}

state.SetBytesProcessed(state.iterations() * device_input.size());
state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
}

class MultibyteSplitBenchmark : public cudf::benchmark {
Expand Down
Loading

0 comments on commit b08a9b5

Please sign in to comment.