Skip to content

Commit

Permalink
Merge branch 'branch-22.02' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…fea-parquet-dec128
  • Loading branch information
vuule committed Dec 7, 2021
2 parents df8a8e3 + a72f19e commit 8486647
Show file tree
Hide file tree
Showing 111 changed files with 1,819 additions and 1,610 deletions.
6 changes: 3 additions & 3 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ function sed_runner() {
}

# cpp update
sed_runner 's/'"CUDF VERSION .* LANGUAGES"'/'"CUDF VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt

# cpp cudf_jni update
sed_runner 's/'"CUDF_JNI VERSION .* LANGUAGES"'/'"CUDF_JNI VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' java/src/main/native/CMakeLists.txt
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt

# rapids-cmake version
sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake
Expand Down
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)

project(
CUDF
VERSION 21.12.00
VERSION 22.02.00
LANGUAGES C CXX CUDA
)

Expand Down Expand Up @@ -185,6 +185,7 @@ add_library(
src/binaryop/compiled/LogicalOr.cu
src/binaryop/compiled/Mod.cu
src/binaryop/compiled/Mul.cu
src/binaryop/compiled/NullEquals.cu
src/binaryop/compiled/NullMax.cu
src/binaryop/compiled/NullMin.cu
src/binaryop/compiled/PMod.cu
Expand Down
66 changes: 36 additions & 30 deletions cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
}

// TODO tparam boolean for null.
#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \
#define BINARYOP_BENCHMARK_DEFINE(name, TypeLhs, TypeRhs, binop, TypeOut) \
BENCHMARK_TEMPLATE_DEFINE_F( \
COMPILED_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
COMPILED_BINARYOP, name, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
(::benchmark::State & st) \
{ \
BM_compiled_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop); \
} \
BENCHMARK_REGISTER_F(COMPILED_BINARYOP, binop) \
BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name) \
->Unit(benchmark::kMicrosecond) \
->UseManualTime() \
->Arg(10000) /* 10k */ \
Expand All @@ -70,30 +70,36 @@ using namespace cudf;
using namespace numeric;

// clang-format off
BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t);
BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms);
BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t);
BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double);
BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double);
BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t);
BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double);
BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double);
BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double);
BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int);
BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int);
BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t);
BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t);
BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t);
BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t);
BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool);
BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool);
BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool);
BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool);
BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool);
BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32);
BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s);
BINARYOP_BENCHMARK_DEFINE(ADD_1, float, float, ADD, float);
BINARYOP_BENCHMARK_DEFINE(ADD_2, timestamp_s, duration_s, ADD, timestamp_s);
BINARYOP_BENCHMARK_DEFINE(SUB_1, duration_s, duration_D, SUB, duration_ms);
BINARYOP_BENCHMARK_DEFINE(SUB_2, int64_t, int64_t, SUB, int64_t);
BINARYOP_BENCHMARK_DEFINE(MUL_1, float, float, MUL, int64_t);
BINARYOP_BENCHMARK_DEFINE(MUL_2, duration_s, int64_t, MUL, duration_s);
BINARYOP_BENCHMARK_DEFINE(DIV_1, int64_t, int64_t, DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(DIV_2, duration_ms, int32_t, DIV, duration_ms);
BINARYOP_BENCHMARK_DEFINE(TRUE_DIV, int64_t, int64_t, TRUE_DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(FLOOR_DIV, int64_t, int64_t, FLOOR_DIV, int64_t);
BINARYOP_BENCHMARK_DEFINE(MOD_1, double, double, MOD, double);
BINARYOP_BENCHMARK_DEFINE(MOD_2, duration_ms, int64_t, MOD, duration_ms);
BINARYOP_BENCHMARK_DEFINE(PMOD, int32_t, int64_t, PMOD, double);
BINARYOP_BENCHMARK_DEFINE(PYMOD, int32_t, uint8_t, PYMOD, int64_t);
BINARYOP_BENCHMARK_DEFINE(POW, int64_t, int64_t, POW, double);
BINARYOP_BENCHMARK_DEFINE(LOG_BASE, float, double, LOG_BASE, double);
BINARYOP_BENCHMARK_DEFINE(ATAN2, float, double, ATAN2, double);
BINARYOP_BENCHMARK_DEFINE(SHIFT_LEFT, int, int, SHIFT_LEFT, int);
BINARYOP_BENCHMARK_DEFINE(SHIFT_RIGHT, int16_t, int64_t, SHIFT_RIGHT, int);
BINARYOP_BENCHMARK_DEFINE(USHIFT_RIGHT, int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t);
BINARYOP_BENCHMARK_DEFINE(BITWISE_AND, int64_t, int32_t, BITWISE_AND, int16_t);
BINARYOP_BENCHMARK_DEFINE(BITWISE_OR, int16_t, int32_t, BITWISE_OR, int64_t);
BINARYOP_BENCHMARK_DEFINE(BITWISE_XOR, int16_t, int64_t, BITWISE_XOR, int32_t);
BINARYOP_BENCHMARK_DEFINE(LOGICAL_AND, double, int8_t, LOGICAL_AND, bool);
BINARYOP_BENCHMARK_DEFINE(LOGICAL_OR, int16_t, int64_t, LOGICAL_OR, bool);
BINARYOP_BENCHMARK_DEFINE(EQUAL_1, int32_t, int64_t, EQUAL, bool);
BINARYOP_BENCHMARK_DEFINE(EQUAL_2, duration_ms, duration_ns, EQUAL, bool);
BINARYOP_BENCHMARK_DEFINE(NOT_EQUAL, decimal32, decimal32, NOT_EQUAL, bool);
BINARYOP_BENCHMARK_DEFINE(LESS, timestamp_s, timestamp_s, LESS, bool);
BINARYOP_BENCHMARK_DEFINE(GREATER, timestamp_ms, timestamp_s, GREATER, bool);
BINARYOP_BENCHMARK_DEFINE(NULL_EQUALS, duration_ms, duration_ns, NULL_EQUALS, bool);
BINARYOP_BENCHMARK_DEFINE(NULL_MAX, decimal32, decimal32, NULL_MAX, decimal32);
BINARYOP_BENCHMARK_DEFINE(NULL_MIN, timestamp_D, timestamp_s, NULL_MIN, timestamp_s);
7 changes: 4 additions & 3 deletions cpp/benchmarks/groupby/group_sum_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ void BM_basic_sum(benchmark::State& state)
{
using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;

// const cudf::size_type num_columns{(cudf::size_type)state.range(0)};
const cudf::size_type column_size{(cudf::size_type)state.range(0)};

auto data_it = cudf::detail::make_counting_transform_iterator(
Expand All @@ -53,7 +52,7 @@ void BM_basic_sum(benchmark::State& state)
wrapper keys(data_it, data_it + column_size);
wrapper vals(data_it, data_it + column_size);

cudf::groupby::groupby gb_obj(cudf::table_view({keys}));
cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys}));

std::vector<cudf::groupby::aggregation_request> requests;
requests.emplace_back(cudf::groupby::aggregation_request());
Expand All @@ -73,7 +72,9 @@ BENCHMARK_REGISTER_F(Groupby, Basic)
->UseManualTime()
->Unit(benchmark::kMillisecond)
->Arg(10000)
->Arg(10000000);
->Arg(1000000)
->Arg(10000000)
->Arg(100000000);

void BM_pre_sorted_sum(benchmark::State& state)
{
Expand Down
37 changes: 25 additions & 12 deletions cpp/benchmarks/hashing/hash_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,40 @@
class HashBenchmark : public cudf::benchmark {
};

static void BM_hash(benchmark::State& state, cudf::hash_id hid)
enum contains_nulls { no_nulls, nulls };

static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls has_nulls)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows});
if (has_nulls == contains_nulls::no_nulls)
data->get_column(0).set_null_mask(rmm::device_buffer{}, 0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cudf::hash(data->view(), hid);
}
}

#define HASH_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(HashBenchmark, name) \
(::benchmark::State & st) { BM_hash(st, cudf::hash_id::name); } \
BENCHMARK_REGISTER_F(HashBenchmark, name) \
->RangeMultiplier(4) \
->Ranges({{1 << 14, 1 << 24}}) \
->UseManualTime() \
#define concat(a, b, c) a##b##c

#define H_BENCHMARK_DEFINE(name, hid, n) \
BENCHMARK_DEFINE_F(HashBenchmark, name) \
(::benchmark::State & st) { BM_hash(st, cudf::hash_id::hid, contains_nulls::n); } \
BENCHMARK_REGISTER_F(HashBenchmark, name) \
->RangeMultiplier(4) \
->Ranges({{1 << 14, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

HASH_BENCHMARK_DEFINE(HASH_MURMUR3)
HASH_BENCHMARK_DEFINE(HASH_MD5)
HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3)
HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3)
#define HASH_BENCHMARK_DEFINE(hid, n) H_BENCHMARK_DEFINE(concat(hid, _, n), hid, n)

HASH_BENCHMARK_DEFINE(HASH_MURMUR3, nulls)
HASH_BENCHMARK_DEFINE(HASH_MD5, nulls)
HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, nulls)
HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, nulls)

HASH_BENCHMARK_DEFINE(HASH_MURMUR3, no_nulls)
HASH_BENCHMARK_DEFINE(HASH_MD5, no_nulls)
HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, no_nulls)
HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, no_nulls)
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
cuco 0.0
GLOBAL_TARGETS cuco::cuco
CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
GIT_TAG f0eecb203590f1f4ac4a9f1700229f4434ac64dc
GIT_TAG 6433e8ad7571f14cc5384051b049029c60dd1ce0
OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
)

Expand Down
11 changes: 4 additions & 7 deletions cpp/docs/DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The following is the license header comment that should appear at the beginning

```c++
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -30,7 +30,7 @@ The comment should start with `/*` and not `/**` so it is not processed by doxyg
Also, here are the rules for the copyright year.

- A new file should have the year in which it was created
- A modified file should span the year it was created and the year it was modified (e.g. `2019-2020`)
- A modified file should span the year it was created and the year it was modified (e.g. `2019-2021`)

Changing the copyright year may not be necessary if no content has changed (e.g. reformatting only).

Expand All @@ -48,7 +48,7 @@ Here are some of the custom options in the Doxyfile for libcudf.
| Option | Setting | Description |
| ------ | ------- | ----------- |
| PROJECT_NAME | libcudf | Title used on the main page |
| PROJECT_NUMBER | 0.14 | Version number |
| PROJECT_NUMBER | 22.02.00 | Version number |
| EXTENSION_MAPPING | cu=C++ cuh=C++ | Process `cu` and `cuh` as C++ |
| INPUT | main_page.md regex.md unicode.md ../include | Embedded markdown files and source code directories to process |
| FILE_PATTERNS | *.cpp *.hpp *.h *.c *.cu *.cuh | File extensions to process |
Expand Down Expand Up @@ -459,17 +459,14 @@ We recommend installing Doxygen using conda (`conda install doxygen`) or a Linux
Alternatively you can [build and install doxygen from source](http://www.doxygen.nl/manual/install.html).
To build the libcudf HTML documentation simply run the `doxygen` command from the `cpp/doxygen` directory containing the `Doxyfile`.
The libcudf documentation can also be built using `make docs_cudf` from the cmake build directory (e.g. `cpp/build`).
Doxygen reads and processes all appropriate source files under the `cpp/include/` directory.
The output is generated in the `cpp/doxygen/html/` directory.
You can load the local `index.html` file generated there into any web browser to view the result.
To view docs built on a remote server, you can run a simple HTTP server using Python: `cd html && python -m http.server`.
Then open `http://<IP address>:8000` in your local web browser, inserting the IP address of the machine on which you ran the HTTP server.
By default, doxygen uses the `graphviz dot` tool to build diagrams of the class, namespace, and module relationships.
If the `dot` tool cannot be found then doxygen generates output without diagrams.
The doxygen installation page does not include instructions for downloading and installing `graphviz dot`.
The doxygen output is intended for building documentation only for the public APIs and classes.
For example, the output should not include documentation for `detail` or `/src` files, and these directories are excluded in the `Doxyfile` configuration.
When published by the build/CI system, the doxygen output will appear on our external [RAPIDS web site](https://docs.rapids.ai/api/libcudf/stable/index.html).
Loading

0 comments on commit 8486647

Please sign in to comment.