Skip to content

Commit

Permalink
Merge branch 'branch-0.18' into rwlee/sparkspecific
Browse files Browse the repository at this point in the history
  • Loading branch information
rwlee authored Jan 4, 2021
2 parents 4b6db38 + ca1a4d6 commit a4e95fe
Show file tree
Hide file tree
Showing 257 changed files with 6,185 additions and 4,052 deletions.
15 changes: 13 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,38 @@

## New Features
- PR #6856 Add groupby idxmin, idxmax aggregation

- PR #6847 Add a cmake find module for cuFile in JNI code
- PR #6902 Implement `DataFrame.quantile` for `datetime` and `timedelta` data types
- PR #6814 Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1)
- PR #6929 Add `Index.set_names` api
- PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support
- PR #6885 Share `factorize` implementation with Index and cudf module

- PR #6775 Implement cudf.DateOffset for months

## Improvements

- PR #6938 Pass numeric scalars of the same dtype through numeric binops
- PR #6275 Update to official libcu++ on Github
- PR #6838 Fix `columns` & `index` handling in dataframe constructor
- PR #6750 Remove **kwargs from string/categorical methods
- PR #6909 Support reading byte array backed decimal columns from parquet files
- PR #6939 Use simplified `rmm::exec_policy`
- PR #6512 Refactor rolling.cu to reduce compile time
- PR #6982 Disable some pragma unroll statements in thrust `sort.h`

## Bug Fixes

- PR #6884 Correct the sampling range when sampling with replacement
- PR #6903 Add null count test for apply_boolean_mask
- PR #6922 Fix N/A detection for empty fields in CSV reader
- PR #6912 Fix rmm_mode=managed parameter for gtests
- PR #6943 Fix join with nulls not equal performance
- PR #6945 Fix groupby agg/apply behaviour when no key columns are provided
- PR #6942 Fix cudf::merge gtest for dictionary columns


# cuDF 0.17.0 (Date TBD)
# cuDF 0.17.0 (10 Dec 2020)

## New Features

Expand Down Expand Up @@ -63,6 +72,7 @@
- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series
- PR #6817 Add support for scatter() on lists-of-struct columns
- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
- PR #6483 Add `agg` function to aggregate dataframe using one or more operations
- PR #6726 Support selecting different hash functions in hash_partition
- PR #6619 Improve Dockerfile
- PR #6831 Added parquet chunked writing ability for list columns
Expand Down Expand Up @@ -153,6 +163,7 @@
- PR #6837 Avoid gather when copying strings view from start of strings column
- PR #6859 Move align_ptr_for_type() from cuda.cuh to alignment.hpp
- PR #6807 Refactor `std::array` usage in row group index writing in ORC
- PR #6914 Enable groupby `list` aggregation for strings
- PR #6908 Parquet option for strictly decimal reading

## Bug Fixes
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,15 @@ fi

cd $WORKSPACE/python/cudf
gpuci_logger "Python py.test for cuDF"
py.test --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term

cd $WORKSPACE/python/dask_cudf
gpuci_logger "Python py.test for dask-cudf"
py.test --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term

cd $WORKSPACE/python/custreamz
gpuci_logger "Python py.test for cuStreamz"
py.test --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term
py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term

gpuci_logger "Test notebooks"
${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log
Expand Down
1 change: 1 addition & 0 deletions conda/environments/cudf_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- fsspec>=0.6.0
- pytest
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
Expand Down
1 change: 1 addition & 0 deletions conda/environments/cudf_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- fsspec>=0.6.0
- pytest
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
Expand Down
1 change: 1 addition & 0 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies:
- fsspec>=0.6.0
- pytest
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ requirements:
- cmake >=3.17.0
host:
- libcudf {{ version }}
- librdkafka 1.5
- librdkafka >=1.5.0,<1.5.3
run:
- {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not

Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}")
# - sort benchmark --------------------------------------------------------------------------------

set(SORT_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu")
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cu")

ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}")

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/common/generate_benchmark_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
*/
struct string_column_data {
std::vector<char> chars;
std::vector<int32_t> offsets;
std::vector<cudf::size_type> offsets;
std::vector<cudf::bitmask_type> null_mask;
explicit string_column_data(cudf::size_type rows, cudf::size_type size)
{
Expand Down
73 changes: 60 additions & 13 deletions cpp/benchmarks/join/join_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <fixture/benchmark_fixture.hpp>
Expand All @@ -36,7 +37,7 @@ template <typename key_type, typename payload_type>
class Join : public cudf::benchmark {
};

template <typename key_type, typename payload_type>
template <typename key_type, typename payload_type, bool Nullable>
static void BM_join(benchmark::State &state)
{
const cudf::size_type build_table_size{(cudf::size_type)state.range(0)};
Expand All @@ -46,11 +47,33 @@ static void BM_join(benchmark::State &state)
const bool is_build_table_key_unique = true;

// Generate build and probe tables

auto build_key_column =
cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()), build_table_size);
auto probe_key_column =
cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()), probe_table_size);
cudf::test::UniformRandomGenerator<cudf::size_type> rand_gen(0, build_table_size);
auto build_random_null_mask = [&rand_gen](int size) {
if (Nullable) {
// roughly 25% nulls
auto validity = thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
[&rand_gen](auto i) { return (rand_gen.generate() & 3) == 0; });
return cudf::test::detail::make_null_mask(validity, validity + size);
} else {
return cudf::create_null_mask(size, cudf::mask_state::UNINITIALIZED);
}
};

std::unique_ptr<cudf::column> build_key_column = [&]() {
return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
build_table_size,
build_random_null_mask(build_table_size))
: cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
build_table_size);
}();
std::unique_ptr<cudf::column> probe_key_column = [&]() {
return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
probe_table_size,
build_random_null_mask(probe_table_size))
: cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
probe_table_size);
}();

generate_input_tables<key_type, cudf::size_type>(
build_key_column->mutable_view().data<key_type>(),
Expand Down Expand Up @@ -82,17 +105,23 @@ static void BM_join(benchmark::State &state)
for (auto _ : state) {
cuda_event_timer raii(state, true, 0);

auto result =
cudf::inner_join(probe_table, build_table, columns_to_join, columns_to_join, {{0, 0}});
auto result = cudf::inner_join(probe_table,
build_table,
columns_to_join,
columns_to_join,
{{0, 0}},
cudf::null_equality::UNEQUAL);
}
}

#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) { BM_join<key_type, payload_type>(st); }
#define JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(Join, name, key_type, payload_type) \
(::benchmark::State & st) { BM_join<key_type, payload_type, nullable>(st); }

JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t);
JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t);
JOIN_BENCHMARK_DEFINE(join_32bit, int32_t, int32_t, false);
JOIN_BENCHMARK_DEFINE(join_64bit, int64_t, int64_t, false);
JOIN_BENCHMARK_DEFINE(join_32bit_nulls, int32_t, int32_t, true);
JOIN_BENCHMARK_DEFINE(join_64bit_nulls, int64_t, int64_t, true);

BENCHMARK_REGISTER_F(Join, join_32bit)
->Unit(benchmark::kMillisecond)
Expand All @@ -111,3 +140,21 @@ BENCHMARK_REGISTER_F(Join, join_64bit)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, join_32bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({100'000, 100'000})
->Args({100'000, 400'000})
->Args({100'000, 1'000'000})
->Args({10'000'000, 10'000'000})
->Args({10'000'000, 40'000'000})
->Args({10'000'000, 100'000'000})
->Args({100'000'000, 100'000'000})
->Args({80'000'000, 240'000'000})
->UseManualTime();

BENCHMARK_REGISTER_F(Join, join_64bit_nulls)
->Unit(benchmark::kMillisecond)
->Args({50'000'000, 50'000'000})
->Args({40'000'000, 120'000'000})
->UseManualTime();
49 changes: 49 additions & 0 deletions cpp/benchmarks/sort/sort_strings_benchmark.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/sorting.hpp>
#include <cudf/types.hpp>

class SortStrings : public cudf::benchmark {
};

static void BM_sort(benchmark::State& state)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};

auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows});

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
cudf::sort(table->view());
}
}

#define SORT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(SortStrings, name) \
(::benchmark::State & st) { BM_sort(st); } \
BENCHMARK_REGISTER_F(SortStrings, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(stringssort)
6 changes: 3 additions & 3 deletions cpp/benchmarks/synchronization/synchronization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
/**
* @file synchronization.hpp
* @brief This is the header file for `cuda_event_timer`.
**/
*/

/**
* @brief This class serves as a wrapper for using `cudaEvent_t` as the user
Expand Down Expand Up @@ -54,7 +54,7 @@
BENCHMARK(sample_cuda_benchmark)->UseManualTime();
**/
*/

#ifndef CUDF_BENCH_SYNCHRONIZATION_H
#define CUDF_BENCH_SYNCHRONIZATION_H
Expand All @@ -79,7 +79,7 @@ class cuda_event_timer {
* @param[in] flush_l2_cache_ whether or not to flush the L2 cache before
* every iteration.
* @param[in] stream_ The CUDA stream we are measuring time on.
**/
*/
cuda_event_timer(benchmark::State& state,
bool flush_l2_cache,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
Expand Down
2 changes: 1 addition & 1 deletion cpp/docs/TRANSITIONGUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ namespace experimental{
* @param mr Memory resource used to allocate device memory for the returned
* output column
* @return std::unique_ptr<column> Newly allocated output column
**/
*/
std::unique_ptr<column> new_function(cudf::column_view input,
cudf::mutable_column_view in_out,
cudf::table_view input_table,
Expand Down
4 changes: 0 additions & 4 deletions cpp/include/cudf/ast/detail/linearizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ namespace detail {
*
* This enum is device-specific. For instance, intermediate data references are generated by the
* linearization process but cannot be explicitly created by the user.
*
*/
enum class device_data_reference_type {
COLUMN, // A value in a table column
Expand All @@ -52,7 +51,6 @@ enum class device_data_reference_type {
*
* This is a POD class used to create references describing data type and locations for consumption
* by the `row_evaluator`.
*
*/
struct alignas(8) device_data_reference {
device_data_reference(device_data_reference_type reference_type,
Expand Down Expand Up @@ -85,7 +83,6 @@ class linearizer;
*
* This class is a part of a "visitor" pattern with the `linearizer` class.
* Nodes inheriting from this class can accept visitors.
*
*/
class node {
friend class detail::linearizer;
Expand All @@ -104,7 +101,6 @@ class node {
* the nodes and constructing vectors of information that are later used by the device for
* evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are
* resolved into intermediate data storage in shared memory.
*
*/
class linearizer {
friend class literal;
Expand Down
3 changes: 0 additions & 3 deletions cpp/include/cudf/ast/detail/operators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,6 @@ struct dispatch_unary_operator_types {

/**
* @brief Functor performing a type dispatch for a unary operator.
*
*/
struct type_dispatch_unary_op {
template <ast_operator op, typename F, typename... Ts>
Expand Down Expand Up @@ -968,7 +967,6 @@ CUDA_HOST_DEVICE_CALLABLE constexpr void unary_operator_dispatcher(ast_operator

/**
* @brief Functor to determine the return type of an operator from its input types.
*
*/
struct return_type_functor {
/**
Expand Down Expand Up @@ -1057,7 +1055,6 @@ inline cudf::data_type ast_operator_return_type(ast_operator op,

/**
* @brief Functor to determine the arity (number of operands) of an operator.
*
*/
struct arity_functor {
template <ast_operator op>
Expand Down
1 change: 0 additions & 1 deletion cpp/include/cudf/ast/detail/transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ struct binary_row_output : public row_output {
* This class is designed for n-ary transform evaluation. Currently this class assumes that there's
* only one relevant "row index" in its methods, which corresponds to a row in a single input table
* and the same row index in an output column.
*
*/
struct row_evaluator {
friend struct row_output;
Expand Down
Loading

0 comments on commit a4e95fe

Please sign in to comment.