Skip to content

Commit

Permalink
Merge branch 'branch-0.19' into fix-7111-is-categorical-dtype-dispatch
Browse files Browse the repository at this point in the history
  • Loading branch information
Keith Kraus authored Mar 30, 2021
2 parents d6240ce + 7d49f75 commit 22ca8f3
Show file tree
Hide file tree
Showing 109 changed files with 5,868 additions and 3,933 deletions.
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ requirements:
- numba >=0.49.0
- dlpack
- pyarrow 1.0.1
- libcudf {{ version }}
- libcudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
run:
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ requirements:
- python
- cython >=0.29,<0.30
- setuptools
- cudf {{ version }}
- libcudf_kafka {{ version }}
- cudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- libcudf_kafka {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
run:
- libcudf_kafka {{ version }}
- libcudf_kafka {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- python-confluent-kafka
- cudf {{ version }}
- cudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}

test:
requires:
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ requirements:
host:
- python
- python-confluent-kafka
- cudf_kafka {{ version }}
- cudf_kafka {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
run:
- python
- streamz
- cudf {{ version }}
- streamz
- cudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- dask >=2.22.0
- distributed >=2.22.0
- python-confluent-kafka
- cudf_kafka {{ version }}
- cudf_kafka {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}

test:
requires:
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ build:
requirements:
host:
- python
- cudf {{ version }}
- cudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- dask>=2021.3.1
- distributed >=2.22.0
run:
- python
- cudf {{ version }}
- cudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- dask>=2021.3.1
- distributed >=2.22.0

test:
requires:
- cudatoolkit {{ cuda_version }}.*
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ build:

requirements:
build:
- cmake >=3.17.0
- cmake >=3.18
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ requirements:
build:
- cmake >=3.17.0
host:
- libcudf {{ version }}
- libcudf {{ version }}=*_{{ GIT_DESCRIBE_NUMBER }}
- librdkafka >=1.5.0,<1.5.3
run:
- {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not
Expand Down
16 changes: 10 additions & 6 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ add_library(cudf
src/transform/jit/code/kernel.cpp
src/transform/mask_to_bools.cu
src/transform/nans_to_nulls.cu
src/transform/row_bit_count.cu
src/transform/transform.cpp
src/transpose/transpose.cu
src/unary/cast_ops.cu
Expand Down Expand Up @@ -554,12 +555,6 @@ if(CUDF_BUILD_BENCHMARKS)
GIT_SHALLOW TRUE
OPTIONS "BENCHMARK_ENABLE_TESTING OFF"
"BENCHMARK_ENABLE_INSTALL OFF")
if(benchmark_ADDED)
install(TARGETS benchmark
benchmark_main
DESTINATION lib
EXPORT cudf-targets)
endif()
add_subdirectory(benchmarks)
endif()

Expand Down Expand Up @@ -636,6 +631,15 @@ elseif(TARGET arrow_static)
endif()
endif()

if(TARGET gtest)
get_target_property(gtest_is_imported gtest IMPORTED)
if(NOT gtest_is_imported)
export(TARGETS gtest gmock gtest_main gmock_main
FILE ${CUDF_BINARY_DIR}/cudf-gtesting-targets.cmake
NAMESPACE GTest::)
endif()
endif()

export(EXPORT cudf-targets
FILE ${CUDF_BINARY_DIR}/cudf-targets.cmake
NAMESPACE cudf::)
Expand Down
6 changes: 4 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,12 @@ ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu)
###################################################################################################
# - nvtext benchmark -------------------------------------------------------------------
ConfigureBench(TEXT_BENCH
text/ngrams_benchmark.cpp
text/normalize_benchmark.cpp
text/normalize_spaces_benchmark.cpp
text/tokenize_benchmark.cpp
text/subword_benchmark.cpp)
text/replace_benchmark.cpp
text/subword_benchmark.cpp
text/tokenize_benchmark.cpp)

###################################################################################################
# - strings benchmark -------------------------------------------------------------------
Expand Down
8 changes: 2 additions & 6 deletions cpp/benchmarks/join/join_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,8 @@ static void BM_join(benchmark::State &state)
for (auto _ : state) {
cuda_event_timer raii(state, true, 0);

auto result = cudf::inner_join(probe_table,
build_table,
columns_to_join,
columns_to_join,
{{0, 0}},
cudf::null_equality::UNEQUAL);
auto result = cudf::inner_join(
probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL);
}
}

Expand Down
76 changes: 76 additions & 0 deletions cpp/benchmarks/text/ngrams_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/base_fixture.hpp>

#include <nvtext/generate_ngrams.hpp>

class TextNGrams : public cudf::benchmark {
};

enum class ngrams_type { tokens, characters };

static void BM_ngrams(benchmark::State& state, ngrams_type nt)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const max_str_length = static_cast<cudf::size_type>(state.range(1));
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (nt) {
case ngrams_type::tokens: nvtext::generate_ngrams(input); break;
case ngrams_type::characters: nvtext::generate_character_ngrams(input); break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 5;
int const max_rowlen = 40;
int const len_mult = 2;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextNGrams, name) \
(::benchmark::State & st) { BM_ngrams(st, ngrams_type::name); } \
BENCHMARK_REGISTER_F(TextNGrams, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(tokens)
NVTEXT_BENCHMARK_DEFINE(characters)
85 changes: 85 additions & 0 deletions cpp/benchmarks/text/replace_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/string/string_bench_args.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <nvtext/replace.hpp>

class TextReplace : public cudf::benchmark {
};

static void BM_replace(benchmark::State& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.range(0));
auto const n_length = static_cast<cudf::size_type>(state.range(1));

std::vector<std::string> words{" ", "one ", "two ", "three ", "four ",
"five ", "six ", "sevén ", "eight ", "nine ",
"ten ", "eleven ", "twelve ", "thirteen ", "fourteen ",
"fifteen ", "sixteen ", "seventeen ", "eighteen ", "nineteen "};

std::default_random_engine generator;
std::uniform_int_distribution<int> tokens_dist(0, words.size() - 1);
std::string row; // build a row of random tokens
while (static_cast<int>(row.size()) < n_length) row += words[tokens_dist(generator)];

std::uniform_int_distribution<int> position_dist(0, 16);

auto elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto idx) { return row.c_str() + position_dist(generator); });
cudf::test::strings_column_wrapper input(elements, elements + n_rows);
cudf::strings_column_view view(input);

cudf::test::strings_column_wrapper targets({"one", "two", "sevén", "zero"});
cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"});

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
nvtext::replace_tokens(
view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
}

state.SetBytesProcessed(state.iterations() * view.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_multiplier = 8;
int const min_row_length = 1 << 5;
int const max_row_length = 1 << 13;
int const length_multiplier = 4;
generate_string_bench_args(
b, min_rows, max_rows, row_multiplier, min_row_length, max_row_length, length_multiplier);
}

#define NVTEXT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(TextReplace, name) \
(::benchmark::State & st) { BM_replace(st); } \
BENCHMARK_REGISTER_F(TextReplace, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

NVTEXT_BENCHMARK_DEFINE(replace)
17 changes: 15 additions & 2 deletions cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include <cudf/detail/utilities/cuda.cuh>

#include <cudf/utilities/traits.hpp>
#include <random>
#include <type_traits>
#include "../fixture/benchmark_fixture.hpp"
Expand Down Expand Up @@ -87,14 +88,20 @@ __global__ void host_dispatching_kernel(mutable_column_device_view source_column

template <FunctorType functor_type>
struct ColumnHandle {
template <typename ColumnType>
template <typename ColumnType, CUDF_ENABLE_IF(cudf::is_rep_layout_compatible<ColumnType>())>
void operator()(mutable_column_device_view source_column, int work_per_thread)
{
cudf::detail::grid_1d grid_config{source_column.size(), block_size};
int grid_size = grid_config.num_blocks;
// Launch the kernel.
host_dispatching_kernel<functor_type, ColumnType><<<grid_size, block_size>>>(source_column);
}

template <typename ColumnType, CUDF_ENABLE_IF(not cudf::is_rep_layout_compatible<ColumnType>())>
void operator()(mutable_column_device_view source_column, int work_per_thread)
{
CUDF_FAIL("Invalid type to benchmark.");
}
};

// The following is for DEVICE_DISPATCHING:
Expand All @@ -104,12 +111,18 @@ struct ColumnHandle {
// n_rows * n_cols.
template <FunctorType functor_type>
struct RowHandle {
template <typename T>
template <typename T, CUDF_ENABLE_IF(cudf::is_rep_layout_compatible<T>())>
__device__ void operator()(mutable_column_device_view source, cudf::size_type index)
{
using F = Functor<T, functor_type>;
source.data<T>()[index] = F::f(source.data<T>()[index]);
}

template <typename T, CUDF_ENABLE_IF(not cudf::is_rep_layout_compatible<T>())>
__device__ void operator()(mutable_column_device_view source, cudf::size_type index)
{
cudf_assert(false && "Unsupported type.");
}
};

// This is for DEVICE_DISPATCHING
Expand Down
Loading

0 comments on commit 22ca8f3

Please sign in to comment.