Skip to content

Commit

Permalink
Merge branch 'scan_reduce_aggregations' into tdigest_code_move
Browse files Browse the repository at this point in the history
  • Loading branch information
nvdbaranec committed Mar 11, 2022
2 parents 7fdc9f5 + 6f940fd commit 13c776a
Show file tree
Hide file tree
Showing 241 changed files with 9,313 additions and 5,015 deletions.
4 changes: 4 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ if hasArg clean; then
rmdir ${bd} || true
fi
done

# Cleaning up python artifacts
find ${REPODIR}/python/ | grep -E "(__pycache__|\.pyc|\.pyo|\.so$)" | xargs rm -rf

fi


Expand Down
5 changes: 3 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,10 @@ fi
# TEST - Run py.test, notebooks
################################################################################

cd "$WORKSPACE/python/cudf"
cd "$WORKSPACE/python/cudf/cudf"
# It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
gpuci_logger "Python py.test for cuDF"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope cudf
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
Expand Down
5 changes: 4 additions & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
coverage:
status:
project: off
patch: off
patch: on

github_checks:
annotations: true
4 changes: 4 additions & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ build:
- CC
- CXX
- CUDAHOSTCXX
# libcudf's run_exports pinning is looser than we would like
ignore_run_exports:
- libcudf

requirements:
build:
Expand All @@ -44,6 +47,7 @@ requirements:
- numba >=0.54
- numpy
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda
- libcudf {{ version }}
- fastavro >=0.22.0
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec>=0.6.0
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ test:
- test -f $PREFIX/include/cudf/io/orc_metadata.hpp
- test -f $PREFIX/include/cudf/io/orc.hpp
- test -f $PREFIX/include/cudf/io/parquet.hpp
- test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
- test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp
Expand Down Expand Up @@ -203,6 +204,7 @@ test:
- test -f $PREFIX/include/cudf/strings/detail/fill.hpp
- test -f $PREFIX/include/cudf/strings/detail/json.hpp
- test -f $PREFIX/include/cudf/strings/detail/replace.hpp
- test -f $PREFIX/include/cudf/strings/detail/utf8.hpp
- test -f $PREFIX/include/cudf/strings/detail/utilities.hpp
- test -f $PREFIX/include/cudf/strings/extract.hpp
- test -f $PREFIX/include/cudf/strings/findall.hpp
Expand Down
5 changes: 5 additions & 0 deletions conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@ build:
- PARALLEL_LEVEL
- VERSION_SUFFIX
- PROJECT_FLASH
# libcudf's run_exports pinning is looser than we would like
ignore_run_exports:
- libcudf

requirements:
build:
- cmake >=3.20.1
host:
- libcudf {{version}}
- librdkafka >=1.7.0,<1.8.0a0
run:
- libcudf {{version}}

test:
commands:
Expand Down
9 changes: 9 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ add_library(
src/io/parquet/writer_impl.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
Expand Down Expand Up @@ -361,6 +362,7 @@ add_library(
src/quantiles/quantiles.cu
src/reductions/all.cu
src/reductions/any.cu
src/reductions/collect_ops.cu
src/reductions/max.cu
src/reductions/mean.cu
src/reductions/min.cu
Expand All @@ -372,6 +374,13 @@ add_library(
src/reductions/scan/scan.cpp
src/reductions/scan/scan_exclusive.cu
src/reductions/scan/scan_inclusive.cu
src/reductions/segmented_all.cu
src/reductions/segmented_any.cu
src/reductions/segmented_max.cu
src/reductions/segmented_min.cu
src/reductions/segmented_product.cu
src/reductions/segmented_reductions.cpp
src/reductions/segmented_sum.cu
src/reductions/std.cu
src/reductions/sum.cu
src/reductions/sum_of_squares.cu
Expand Down
11 changes: 6 additions & 5 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

find_package(Threads REQUIRED)

add_library(cudf_datagen STATIC common/generate_input.cpp)
add_library(cudf_datagen STATIC common/generate_input.cpp common/generate_nullmask.cu)
target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
Expand All @@ -24,7 +24,7 @@ target_compile_options(

target_link_libraries(
cudf_datagen PUBLIC GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main
benchmark::benchmark nvbench::nvbench Threads::Threads cudf
benchmark::benchmark nvbench::nvbench Threads::Threads cudf cudftestutil
)

target_include_directories(
Expand Down Expand Up @@ -175,9 +175,10 @@ ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu)
# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
ConfigureBench(
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/reduce.cpp
reduction/scan.cpp reduction/minmax.cpp
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
reduction/reduce.cpp reduction/scan.cpp
)
ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)

# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
Expand Down Expand Up @@ -276,7 +277,7 @@ ConfigureBench(

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cpp)
ConfigureBench(JSON_BENCH string/json.cu)

# ##################################################################################################
# * io benchmark ---------------------------------------------------------------------
Expand Down
73 changes: 24 additions & 49 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,72 +14,42 @@
* limitations under the License.
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <list>
#include <numeric>
#include <random>
#include <memory>
#include <vector>

enum class TreeType {
IMBALANCED_LEFT // All operator expressions have a left child operator expression and a right
// child column reference
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
class AST : public cudf::benchmark {
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>(n_cols);
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);

if constexpr (Nullable) {
auto validities = std::vector<bool>(table_size);
std::random_device rd;
std::mt19937 gen(rd());

std::generate(
validities.begin(), validities.end(), [&]() { return gen() > (0.5 * gen.max()); });
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(
data_iterator, data_iterator + table_size, validities.begin());
});
} else {
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
}
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
Nullable ? 0.5 : -1.0);
auto table = source_table->view();

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
Expand Down Expand Up @@ -138,10 +108,15 @@ static void CustomRanges(benchmark::internal::Benchmark* b)
}
}

#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
TEMPLATED_BENCHMARK_F(AST, BM_ast_transform, key_type, tree_type, reuse_columns, nullable) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \
(::benchmark::State & st) \
{ \
BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st); \
} \
BENCHMARK_REGISTER_F(AST, name) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

AST_TRANSFORM_BENCHMARK_DEFINE(
Expand Down
53 changes: 17 additions & 36 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,23 +14,15 @@
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <numeric>
#include <vector>

// This set of benchmarks is designed to be a comparison for the AST benchmarks
Expand All @@ -47,40 +39,29 @@ class BINARYOP : public cudf::benchmark {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>();
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);
std::generate_n(std::back_inserter(column_wrappers), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
cudf::table_view table{*source_table};

// Execute benchmark
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
// Execute tree that chains additions like (((a + b) + c) + d)
auto const op = cudf::binary_operator::ADD;
auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
auto const op = cudf::binary_operator::ADD;
auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
if (reuse_columns) {
auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type);
for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type);
}
} else {
auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type);
std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) {
result = cudf::binary_operation(result->view(), col, op, result_data_type);
});
}
Expand Down
22 changes: 9 additions & 13 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,26 @@
* limitations under the License.
*/

#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>

#include <thrust/iterator/counting_iterator.h>

class COMPILED_BINARYOP : public cudf::benchmark {
};

template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
{
const cudf::size_type column_size{(cudf::size_type)state.range(0)};
auto const column_size{static_cast<cudf::size_type>(state.range(0))};

auto const source_table = create_random_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});

auto data_it = thrust::make_counting_iterator(0);
cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));

auto lhs = cudf::column_view(input1);
auto rhs = cudf::column_view(input2);
auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
Expand Down
Loading

0 comments on commit 13c776a

Please sign in to comment.