Skip to content

Commit

Permalink
Merge branch 'branch-22.06' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…feature/diff-non-numeric
  • Loading branch information
Matt711 committed Apr 14, 2022
2 parents ca12c80 + ac27757 commit 1eee122
Show file tree
Hide file tree
Showing 79 changed files with 3,605 additions and 2,850 deletions.
81 changes: 73 additions & 8 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
cudfjar - build cudf JAR with static libcudf using devtoolset toolchain
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
Expand Down Expand Up @@ -50,7 +51,9 @@ CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build
CUDF_BUILD_DIR=${REPODIR}/python/cudf/build
DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build
CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build
BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR}"
CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target"

BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}"

# Set defaults for vars modified by flags to this script
VERBOSE_FLAG=""
Expand Down Expand Up @@ -101,6 +104,58 @@ function buildAll {
((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ")
}

function buildLibCudfJniInDocker {
local cudaVersion="11.5.0"
local imageName="cudf-build:${cudaVersion}-devel-centos7"
local CMAKE_GENERATOR="${CMAKE_GENERATOR:-Ninja}"
local workspaceDir="/rapids"
local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
local workspaceRepoDir="$workspaceDir/cudf"
local workspaceMavenRepoDir="$workspaceDir/.m2/repository"
mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build"
nvidia-docker build \
-f java/ci/Dockerfile.centos7 \
--build-arg CUDA_VERSION=${cudaVersion} \
-t $imageName .
nvidia-docker run -it -u $(id -u):$(id -g) --rm \
-v "/etc/group:/etc/group:ro" \
-v "/etc/passwd:/etc/passwd:ro" \
-v "/etc/shadow:/etc/shadow:ro" \
-v "/etc/sudoers.d:/etc/sudoers.d:ro" \
-v "$REPODIR:$workspaceRepoDir:rw" \
-v "$localMavenRepo:$workspaceMavenRepoDir:rw" \
--workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \
${imageName} \
scl enable devtoolset-9 \
"cmake $workspaceRepoDir/cpp \
-G${CMAKE_GENERATOR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUDA_STATIC_RUNTIME=ON \
-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_INSTALL_PREFIX==/usr/local/rapids \
-DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=OFF \
-DPER_THREAD_DEFAULT_STREAM=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DBUILD_SHARED_LIBS=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} && \
cd $workspaceRepoDir/java && \
mvn ${MVN_PHASES:-"package"} \
-Dmaven.repo.local=$workspaceMavenRepoDir \
-DskipTests=${SKIP_TESTS:-false} \
-Dparallel.level=${PARALLEL_LEVEL} \
-DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
-DCUDA_STATIC_RUNTIME=ON \
-DPER_THREAD_DEFAULT_STREAM=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DUSE_GDS=ON \
-DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCUDF_JNI_ARROW_STATIC=ON \
-DCUDF_JNI_LIBCUDF_STATIC=ON \
-Dtest=*,!CuFileTest"
}

if hasArg -h || hasArg --h || hasArg --help; then
echo "${HELP}"
exit 0
Expand Down Expand Up @@ -178,15 +233,21 @@ fi
################################################################################
# Configure, build, and install libcudf

if buildAll || hasArg libcudf; then
if buildAll || hasArg libcudf || hasArg cudfjar; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=NATIVE"
echo "Building for the architecture of the GPU in the system..."
CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
echo "Building for the architecture of the GPU in the system..."
else
echo "Building for the GPU architecture(s) $CUDF_CMAKE_CUDA_ARCHITECTURES ..."
fi
else
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
CUDF_CMAKE_CUDA_ARCHITECTURES="ALL"
echo "Building for *ALL* supported GPU architectures..."
fi
fi

if buildAll || hasArg libcudf; then
# get the current count before the compile starts
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
# zero the sccache statistics
Expand All @@ -195,7 +256,7 @@ if buildAll || hasArg libcudf; then

cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DUSE_NVTX=${BUILD_NVTX} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
Expand Down Expand Up @@ -262,6 +323,10 @@ if buildAll || hasArg dask_cudf; then
fi
fi

if hasArg cudfjar; then
buildLibCudfJniInDocker
fi

# Build libcudf_kafka library
if hasArg libcudf_kafka; then
cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \
Expand Down
6 changes: 3 additions & 3 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"

# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
export INSTALL_DASK_MAIN=0
export INSTALL_DASK_MAIN=1

function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
Expand Down Expand Up @@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
gpuci_logger "gpuci_mamba_retry update dask"
gpuci_mamba_retry update dask
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall
fi

# Install the master version of streamz
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
export INSTALL_DASK_MAIN=0
export INSTALL_DASK_MAIN=1

# ucx-py version
export UCX_PY_VERSION='0.26.*'
Expand Down Expand Up @@ -112,8 +112,8 @@ function install_dask {
gpuci_mamba_retry update dask
conda list
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall
fi
# Install the main version of streamz
gpuci_logger "Install the main version of streamz"
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- clang-tools=11.1.0
- cupy>=9.5.0,<11.0.0a0
- rmm=22.06.*
- cmake>=3.20.1,<3.23
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
- numba>=0.54
Expand Down Expand Up @@ -43,8 +43,8 @@ dependencies:
- pydocstyle=6.1.1
- typing_extensions
- pre-commit
- dask==2022.03.0
- distributed==2022.03.0
- dask>=2022.03.0
- distributed>=2022.03.0
- streamz
- arrow-cpp=7.0.0
- dlpack>=0.5,<0.6.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ build:

requirements:
build:
- cmake >=3.20.1,<3.23
- cmake >=3.20.1,!=3.23.0
host:
- python
- cython >=0.29,<0.30
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ requirements:
- python
- streamz
- cudf {{ version }}
- dask==2022.03.0
- distributed==2022.03.0
- dask>=2022.03.0
- distributed>=2022.03.0
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka {{ version }}

Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ requirements:
host:
- python
- cudf {{ version }}
- dask==2022.03.0
- distributed==2022.03.0
- dask>=2022.03.0
- distributed>=2022.03.0
- cudatoolkit {{ cuda_version }}
run:
- python
- cudf {{ version }}
- dask==2022.03.0
- distributed==2022.03.0
- dask>=2022.03.0
- distributed>=2022.03.0
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}

test: # [linux64]
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_version:
- ">=3.20.1,<3.23"
- ">=3.20.1,!=3.23.0"

gtest_version:
- "=1.10.0"
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ outputs:
- test -f $PREFIX/include/cudf/detail/transpose.hpp
- test -f $PREFIX/include/cudf/detail/unary.hpp
- test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
- test -f $PREFIX/include/cudf/detail/utilities/column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ ConfigureBench(
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
reduction/reduce.cpp reduction/scan.cpp
)
ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)
ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu reduction/rank.cpp)

# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
Expand Down
64 changes: 64 additions & 0 deletions cpp/benchmarks/reduction/rank.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf/detail/scan.hpp>
#include <cudf/filling.hpp>
#include <cudf/lists/list_view.cuh>

#include <nvbench/nvbench.cuh>

template <typename type>
static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<type>)
{
cudf::rmm_pool_raii pool_raii;

auto const dtype = cudf::type_to_id<type>();

double const null_frequency = state.get_float64("null_frequency");
size_t const size = state.get_int64("data_size");

data_profile table_data_profile;
table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 5);
table_data_profile.set_null_frequency(null_frequency);

auto const table = create_random_table({dtype}, table_size_bytes{size / 2}, table_data_profile);

auto const new_tbl = cudf::repeat(table->view(), 2);
cudf::column_view input(new_tbl->view().column(0));

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
auto result = cudf::detail::inclusive_dense_rank_scan(
input, stream_view, rmm::mr::get_current_device_resource());
});
}

using data_type = nvbench::type_list<int32_t, cudf::list_view>;

NVBENCH_BENCH_TYPES(nvbench_reduction_scan, NVBENCH_TYPE_AXES(data_type))
.set_name("rank_scan")
.add_float64_axis("null_frequency", {0, 0.1, 0.5, 0.9})
.add_int64_axis("data_size",
{
10000, // 10k
100000, // 100k
1000000, // 1M
10000000, // 10M
100000000, // 100M
});
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/sort_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,5 @@ void nvbench_sort_struct(nvbench::state& state)
NVBENCH_BENCH(nvbench_sort_struct)
.set_name("sort_struct")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {1, 8})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});
Loading

0 comments on commit 1eee122

Please sign in to comment.