Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-21.10' into groupby-structs
Browse files Browse the repository at this point in the history
  • Loading branch information
mythrocks committed Aug 26, 2021
2 parents e33edf6 + 0ad36ff commit 23e1ad2
Show file tree
Hide file tree
Showing 97 changed files with 4,235 additions and 1,305 deletions.
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ gpuci_mamba_retry install -y \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=0.21.*"
"ucx-py=0.22.*"

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ gpuci_conda_retry install -y \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=0.21.*" \
"ucx-py=0.22.*" \
"openjdk=8.*" \
"maven"

Expand Down
10 changes: 8 additions & 2 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,25 @@ test:
- test -f $PREFIX/include/cudf/hashing.hpp
- test -f $PREFIX/include/cudf/interop.hpp
- test -f $PREFIX/include/cudf/io/avro.hpp
- test -f $PREFIX/include/cudf/io/csv.hpp
- test -f $PREFIX/include/cudf/io/data_sink.hpp
- test -f $PREFIX/include/cudf/io/datasource.hpp
- test -f $PREFIX/include/cudf/io/orc_metadata.hpp
- test -f $PREFIX/include/cudf/io/csv.hpp
- test -f $PREFIX/include/cudf/io/detail/avro.hpp
- test -f $PREFIX/include/cudf/io/detail/csv.hpp
- test -f $PREFIX/include/cudf/io/detail/json.hpp
- test -f $PREFIX/include/cudf/io/detail/orc.hpp
- test -f $PREFIX/include/cudf/io/detail/parquet.hpp
- test -f $PREFIX/include/cudf/io/detail/utils.hpp
- test -f $PREFIX/include/cudf/io/json.hpp
- test -f $PREFIX/include/cudf/io/orc_metadata.hpp
- test -f $PREFIX/include/cudf/io/orc.hpp
- test -f $PREFIX/include/cudf/io/parquet.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp
- test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
- test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp
- test -f $PREFIX/include/cudf/io/text/detail/tile_state.hpp
- test -f $PREFIX/include/cudf/io/text/detail/trie.hpp
- test -f $PREFIX/include/cudf/io/text/multibyte_split.hpp
- test -f $PREFIX/include/cudf/io/types.hpp
- test -f $PREFIX/include/cudf/ipc.hpp
- test -f $PREFIX/include/cudf/join.hpp
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ add_library(cudf
src/io/parquet/writer_impl.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
Expand Down Expand Up @@ -368,8 +369,9 @@ add_library(cudf
src/reshape/interleave_columns.cu
src/reshape/tile.cu
src/rolling/grouped_rolling.cu
src/rolling/rolling.cu
src/rolling/range_window_bounds.cpp
src/rolling/rolling.cu
src/rolling/rolling_collect_list.cu
src/round/round.cu
src/scalar/scalar.cpp
src/scalar/scalar_factories.cpp
Expand Down
5 changes: 5 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,8 @@ ConfigureBench(STRINGS_BENCH
# - json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH
string/json_benchmark.cpp)

###################################################################################################
# - io benchmark ---------------------------------------------------------------------
ConfigureBench(MULTIBYTE_SPLIT_BENCHMARK
io/text/multibyte_split_benchmark.cpp)
8 changes: 4 additions & 4 deletions cpp/benchmarks/binaryop/binaryop_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ static void BM_binaryop_transform(benchmark::State& state)
auto const op = cudf::binary_operator::ADD;
auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
if (reuse_columns) {
auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
auto result = cudf::jit::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
result = cudf::jit::binary_operation(result->view(), columns.at(0), op, result_data_type);
}
} else {
auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
auto result = cudf::jit::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
result = cudf::binary_operation(result->view(), col, op, result_data_type);
result = cudf::jit::binary_operation(result->view(), col, op, result_data_type);
});
}
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype);
cudf::binary_operation(lhs, rhs, binop, output_dtype);

for (auto _ : state) {
cuda_event_timer timer(state, true);
cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype);
cudf::binary_operation(lhs, rhs, binop, output_dtype);
}
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ void BM_binaryop(benchmark::State& state, cudf::binary_operator binop)
auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
cudf::binary_operation(lhs, rhs, binop, output_dtype);
cudf::jit::binary_operation(lhs, rhs, binop, output_dtype);

for (auto _ : state) {
cuda_event_timer timer(state, true);
cudf::binary_operation(lhs, rhs, binop, output_dtype);
cudf::jit::binary_operation(lhs, rhs, binop, output_dtype);
}
}

Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/cuio_benchmark_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ using cudf::io::io_type;
benchmark(name##_buffer_output, type_or_group, static_cast<uint32_t>(io_type::HOST_BUFFER)); \
benchmark(name##_void_output, type_or_group, static_cast<uint32_t>(io_type::VOID));

std::string random_file_in_dir(std::string const& dir_path);

/**
* @brief Class to create a coupled `source_info` and `sink_info` of given type.
*/
Expand Down
164 changes: 164 additions & 0 deletions cpp/benchmarks/io/text/multibyte_split_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_benchmark_common.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf_test/file_utilities.hpp>

#include <cudf/io/text/data_chunk_source_factories.hpp>
#include <cudf/io/text/multibyte_split.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/transform.h>

#include <cstdio>
#include <fstream>
#include <memory>

using cudf::test::fixed_width_column_wrapper;

temp_directory const temp_dir("cudf_gbench");

enum data_chunk_source_type {
device,
file,
host,
};

static cudf::string_scalar create_random_input(int32_t num_chars,
double delim_factor,
double deviation,
std::string delim)
{
auto const num_delims = static_cast<int32_t>((num_chars * delim_factor) / delim.size());
auto const num_delim_chars = num_delims * delim.size();
auto const num_value_chars = num_chars - num_delim_chars;
auto const num_rows = num_delims;
auto const value_size_avg = static_cast<int32_t>(num_value_chars / num_rows);
auto const value_size_min = static_cast<int32_t>(value_size_avg * (1 - deviation));
auto const value_size_max = static_cast<int32_t>(value_size_avg * (1 + deviation));

data_profile table_profile;

table_profile.set_distribution_params( //
cudf::type_id::STRING,
distribution_id::NORMAL,
value_size_min,
value_size_max);

auto const values_table = create_random_table( //
{cudf::type_id::STRING},
1,
row_count{num_rows},
table_profile);

auto delim_scalar = cudf::make_string_scalar(delim);
auto delims_column = cudf::make_column_from_scalar(*delim_scalar, num_rows);
auto input_table = cudf::table_view({values_table->get_column(0).view(), delims_column->view()});
auto input_column = cudf::strings::concatenate(input_table);

// extract the chars from the returned strings column.
auto input_column_contents = input_column->release();
auto chars_column_contents = input_column_contents.children[1]->release();
auto chars_buffer = chars_column_contents.data.release();

// turn the chars in to a string scalar.
return cudf::string_scalar(std::move(*chars_buffer));
}

static void BM_multibyte_split(benchmark::State& state)
{
auto source_type = static_cast<data_chunk_source_type>(state.range(0));
auto delim_size = state.range(1);
auto delim_percent = state.range(2);
auto file_size_approx = state.range(3);

CUDF_EXPECTS(delim_percent >= 1, "delimiter percent must be at least 1");
CUDF_EXPECTS(delim_percent <= 50, "delimiter percent must be at most 50");

auto delim = std::string(":", delim_size);

auto delim_factor = static_cast<double>(delim_percent) / 100;
auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
auto host_input = thrust::host_vector<char>(device_input.size());
auto host_string = std::string(host_input.data(), host_input.size());

cudaMemcpyAsync(host_input.data(),
device_input.data(),
device_input.size() * sizeof(char),
cudaMemcpyDeviceToHost,
rmm::cuda_stream_default);

auto temp_file_name = random_file_in_dir(temp_dir.path());

{
auto temp_fostream = std::ofstream(temp_file_name, std::ofstream::out);
temp_fostream.write(host_input.data(), host_input.size());
}

cudaDeviceSynchronize();

auto source = std::unique_ptr<cudf::io::text::data_chunk_source>(nullptr);

switch (source_type) {
case data_chunk_source_type::file: //
source = cudf::io::text::make_source_from_file(temp_file_name);
break;
case data_chunk_source_type::host: //
source = cudf::io::text::make_source(host_string);
break;
case data_chunk_source_type::device: //
source = cudf::io::text::make_source(device_input);
break;
default: CUDF_FAIL();
}

for (auto _ : state) {
cuda_event_timer raii(state, true);
auto output = cudf::io::text::multibyte_split(*source, delim);
}

state.SetBytesProcessed(state.iterations() * device_input.size());
}

class MultibyteSplitBenchmark : public cudf::benchmark {
};

#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(MultibyteSplitBenchmark, name)(::benchmark::State & state) \
{ \
BM_multibyte_split(state); \
} \
BENCHMARK_REGISTER_F(MultibyteSplitBenchmark, name) \
->ArgsProduct({{data_chunk_source_type::device, \
data_chunk_source_type::file, \
data_chunk_source_type::host}, \
{1, 4, 7}, \
{1, 25}, \
{1 << 15, 1 << 30}}) \
->UseManualTime() \
->Unit(::benchmark::kMillisecond);

TRANSPOSE_BM_BENCHMARK_DEFINE(multibyte_split_simple);
Loading

0 comments on commit 23e1ad2

Please sign in to comment.