Skip to content

Commit

Permalink
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into enh-cpp1…
Browse files Browse the repository at this point in the history
…7_traits1
  • Loading branch information
karthikeyann committed Feb 18, 2022
2 parents 768be5a + 858ab83 commit cbb1b8b
Show file tree
Hide file tree
Showing 26 changed files with 583 additions and 458 deletions.
3 changes: 1 addition & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARGS=$*
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\\\"<args>\\\"]
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
Expand All @@ -32,7 +32,6 @@ HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafk
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
Expand Down
8 changes: 2 additions & 6 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#########################################
# cuDF GPU build and test script for CI #
#########################################
Expand Down Expand Up @@ -98,11 +98,7 @@ conda list --show-channel-urls
################################################################################

logger "Build libcudf..."
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
fi
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds

################################################################################
# BENCHMARK - Run and parse libcudf and cuDF benchmarks
Expand Down
19 changes: 6 additions & 13 deletions ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,56 +23,49 @@ if [ -z "$MY_UPLOAD_KEY" ]; then
return 0
fi

################################################################################
# SETUP - Get conda file output locations
################################################################################

gpuci_logger "Get conda file output locations"

export LIBCUDF_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf --output`
export LIBCUDF_KAFKA_FILE=`conda build --no-build-id --croot "$WORKSPACE/.conda-bld" conda/recipes/libcudf_kafka --output`
export CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON --output`
export DASK_CUDF_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON --output`
export CUDF_KAFKA_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON --output`
export CUSTREAMZ_FILE=`conda build --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON --output`

################################################################################
# UPLOAD - Conda packages
################################################################################

gpuci_logger "Starting conda uploads"
if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
export LIBCUDF_FILE=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
test -e ${LIBCUDF_FILE}
echo "Upload libcudf"
echo ${LIBCUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_FILE} --no-progress
fi

if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
export CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf --python=$PYTHON --output)
test -e ${CUDF_FILE}
echo "Upload cudf"
echo ${CUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUDF_FILE} --no-progress

export DASK_CUDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/dask-cudf --python=$PYTHON --output)
test -e ${DASK_CUDF_FILE}
echo "Upload dask-cudf"
echo ${DASK_CUDF_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${DASK_CUDF_FILE} --no-progress

export CUSTREAMZ_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/custreamz --python=$PYTHON --output)
test -e ${CUSTREAMZ_FILE}
echo "Upload custreamz"
echo ${CUSTREAMZ_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUSTREAMZ_FILE} --no-progress
fi

if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF_KAFKA" == "1" ]]; then
export LIBCUDF_KAFKA_FILE=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf_kafka --output)
test -e ${LIBCUDF_KAFKA_FILE}
echo "Upload libcudf_kafka"
echo ${LIBCUDF_KAFKA_FILE}
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUDF_KAFKA_FILE} --no-progress
fi

if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF_KAFKA" == "1" ]]; then
export CUDF_KAFKA_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/cudf_kafka --python=$PYTHON --output)
test -e ${CUDF_KAFKA_FILE}
echo "Upload cudf_kafka"
echo ${CUDF_KAFKA_FILE}
Expand Down
12 changes: 2 additions & 10 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
fi
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds

################################################################################
# TEST - Run GoogleTest
Expand Down Expand Up @@ -226,11 +222,7 @@ else
install_dask

gpuci_logger "Build python libs from source"
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
fi
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds

fi

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ ConfigureBench(
string/convert_durations.cpp
string/convert_fixed_point.cpp
string/convert_numerics.cpp
string/copy.cpp
string/copy.cu
string/extract.cpp
string/factory.cu
string/filter.cpp
Expand Down
4 changes: 3 additions & 1 deletion cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -135,12 +135,14 @@ static void BM_multibyte_split(benchmark::State& state)
default: CUDF_FAIL();
}

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
cuda_event_timer raii(state, true);
auto output = cudf::io::text::multibyte_split(*source, delim);
}

state.SetBytesProcessed(state.iterations() * device_input.size());
state.counters["peak_memory_usage"] = mem_stats_logger.peak_memory_usage();
}

class MultibyteSplitBenchmark : public cudf::benchmark {
Expand Down
23 changes: 12 additions & 11 deletions cpp/benchmarks/string/copy.cpp → cpp/benchmarks/string/copy.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,7 +14,8 @@
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include "string_bench_args.hpp"

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
Expand All @@ -23,10 +24,7 @@
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <algorithm>
#include <random>

#include "string_bench_args.hpp"
#include <thrust/shuffle.h>

class StringCopy : public cudf::benchmark {
};
Expand All @@ -47,11 +45,14 @@ static void BM_copy(benchmark::State& state, copy_type ct)
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);

// scatter indices
std::vector<cudf::size_type> host_map_data(n_rows);
std::iota(host_map_data.begin(), host_map_data.end(), 0);
std::random_shuffle(host_map_data.begin(), host_map_data.end());
cudf::test::fixed_width_column_wrapper<cudf::size_type> index_map(host_map_data.begin(),
host_map_data.end());
auto index_map_col = make_numeric_column(
cudf::data_type{cudf::type_id::INT32}, n_rows, cudf::mask_state::UNALLOCATED);
auto index_map = index_map_col->mutable_view();
thrust::shuffle_copy(thrust::device,
thrust::counting_iterator<cudf::size_type>(0),
thrust::counting_iterator<cudf::size_type>(n_rows),
index_map.begin<cudf::size_type>(),
thrust::default_random_engine());

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
Expand Down
44 changes: 43 additions & 1 deletion cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,8 +16,13 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <limits>
#include <type_traits>
#include <vector>

/**
Expand Down Expand Up @@ -375,6 +380,43 @@ class column_view : public detail::column_view_base {
*/
auto child_end() const noexcept { return _children.cend(); }

/**
* @brief Construct a column view from a device_span<T>.
*
* Only numeric and chrono types are supported.
*
* @tparam T The device span type. Must be const and match the column view's type.
* @param data A typed device span containing the column view's data.
*/
template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
column_view(device_span<T const> data)
: column_view(
cudf::data_type{cudf::type_to_id<T>()}, data.size(), data.data(), nullptr, 0, 0, {})
{
CUDF_EXPECTS(data.size() < std::numeric_limits<cudf::size_type>::max(),
"Data exceeds the maximum size of a column view.");
}

/**
* @brief Converts a column view into a device span.
*
* Only numeric and chrono data types are supported. The column view must not
* be nullable.
*
* @tparam T The device span type. Must be const and match the column view's type.
* @throws cudf::logic_error if the column view type does not match the span type.
* @throws cudf::logic_error if the column view is nullable.
* @return A typed device span of the column view's data.
*/
template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
[[nodiscard]] operator device_span<T const>() const
{
CUDF_EXPECTS(type() == cudf::data_type{cudf::type_to_id<T>()},
"Device span type must match column view type.");
CUDF_EXPECTS(!nullable(), "A nullable column view cannot be converted to a device span.");
return device_span<T const>(data<T>(), size());
}

private:
friend column_view bit_cast(column_view const& input, data_type type);

Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/utilities/assert.cuh>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/hashing.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>

Expand Down Expand Up @@ -130,7 +131,7 @@ struct MurmurHash3_32 {
*
* @returns A hash value that intelligently combines the lhs and rhs hash values
*/
[[nodiscard]] __device__ inline result_type hash_combine(result_type lhs, result_type rhs)
constexpr result_type hash_combine(result_type lhs, result_type rhs) const
{
result_type combined{lhs};

Expand Down
19 changes: 17 additions & 2 deletions cpp/include/cudf/hashing.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,7 +17,6 @@

#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>

namespace cudf {
/**
Expand All @@ -26,6 +25,22 @@ namespace cudf {
* @file
*/

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/**
* @brief Computes the hash value of each row in the input set of columns.
*
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/partitioning.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@

#pragma once

#include <cudf/types.hpp>
#include <cudf/hashing.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down
18 changes: 1 addition & 17 deletions cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
* Copyright (c) 2018-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -326,21 +326,5 @@ inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lh
*/
std::size_t size_of(data_type t);

/**
* @brief Identifies the hash function to be used
*/
enum class hash_id {
HASH_IDENTITY = 0, ///< Identity hash function that simply returns the key to be hashed
HASH_MURMUR3, ///< Murmur3 hash function
HASH_MD5, ///< MD5 hash function
HASH_SERIAL_MURMUR3, ///< Serial Murmur3 hash function
HASH_SPARK_MURMUR3 ///< Spark Murmur3 hash function
};

/**
* @brief The default seed value for hash functions
*/
static constexpr uint32_t DEFAULT_HASH_SEED = 0;

/** @} */
} // namespace cudf
9 changes: 7 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ endfunction()
# ##################################################################################################
# * column tests ----------------------------------------------------------------------------------
ConfigureTest(
COLUMN_TEST column/bit_cast_test.cpp column/column_view_shallow_test.cpp column/column_test.cu
column/column_device_view_test.cu column/compound_test.cu
COLUMN_TEST
column/bit_cast_test.cpp
column/column_device_view_test.cu
column/column_test.cu
column/column_view_device_span_test.cpp
column/column_view_shallow_test.cpp
column/compound_test.cu
)

# ##################################################################################################
Expand Down
Loading

0 comments on commit cbb1b8b

Please sign in to comment.