Skip to content

Commit

Permalink
Merge branch 'branch-23.06' into parquet_level_optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
nvdbaranec committed May 15, 2023
2 parents 8804007 + 4fe3e38 commit 8bbbab1
Show file tree
Hide file tree
Showing 70 changed files with 1,125 additions and 581 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
hooks:
- id: cython-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.971'
rev: 'v1.3.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ dependencies:
- fmt>=9.1.0,<10
- fsspec>=0.6.0
- gcc_linux-64=11.*
- gmock>=1.13.0.*
- gtest>=1.13.0.*
- gmock>=1.13.0
- gtest>=1.13.0
- hypothesis
- ipython
- libarrow==11.0.0.*
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ outputs:
- {{ pin_subpackage('libcudf', exact=True) }}
- {{ pin_subpackage('libcudf_kafka', exact=True) }}
- cudatoolkit {{ cuda_spec }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- libcurand {{ libcurand_run_version }}
about:
home: https://rapids.ai/
Expand Down
13 changes: 7 additions & 6 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp
stream_compaction/unique_count.cpp
STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/distinct_count.cpp
stream_compaction/unique.cpp stream_compaction/unique_count.cpp
)

# ##################################################################################################
Expand Down Expand Up @@ -195,8 +195,7 @@ ConfigureBench(
reduction/reduce.cpp reduction/scan.cpp
)
ConfigureNVBench(
REDUCTION_NVBENCH reduction/distinct_count.cpp reduction/rank.cpp reduction/scan_structs.cpp
reduction/segmented_reduce.cpp
REDUCTION_NVBENCH reduction/rank.cpp reduction/scan_structs.cpp reduction/segmented_reduce.cpp
)

# ##################################################################################################
Expand Down Expand Up @@ -280,7 +279,6 @@ ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp)
# * strings benchmark -------------------------------------------------------------------
ConfigureBench(
STRINGS_BENCH
string/case.cpp
string/combine.cpp
string/contains.cpp
string/convert_datetime.cpp
Expand All @@ -301,7 +299,10 @@ ConfigureBench(
string/url_decode.cu
)

ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp)
ConfigureNVBench(
STRINGS_NVBENCH string/case.cpp string/char_types.cpp string/lengths.cpp string/like.cpp
string/reverse.cpp
)

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf/detail/stream_compaction.hpp>
#include <cudf/stream_compaction.hpp>

#include <nvbench/nvbench.cuh>

template <typename Type>
static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_list<Type>)
static void bench_distinct_count(nvbench::state& state, nvbench::type_list<Type>)
{
auto const dtype = cudf::type_to_id<Type>();
auto const size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
Expand All @@ -40,16 +40,19 @@ static void bench_reduction_distinct_count(nvbench::state& state, nvbench::type_
auto const& data_column = data_table->get_column(0);
auto const input_table = cudf::table_view{{data_column, data_column, data_column}};

auto mem_stats_logger = cudf::memory_stats_logger(); // init stats logger
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream{launch.get_stream()};
cudf::detail::distinct_count(input_table, cudf::null_equality::EQUAL, stream);
cudf::distinct_count(input_table, cudf::null_equality::EQUAL);
});
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

using data_type = nvbench::type_list<int32_t, int64_t, float, double>;

NVBENCH_BENCH_TYPES(bench_reduction_distinct_count, NVBENCH_TYPE_AXES(data_type))
.set_name("reduction_distinct_count")
NVBENCH_BENCH_TYPES(bench_distinct_count, NVBENCH_TYPE_AXES(data_type))
.set_name("distinct_count")
.add_int64_axis("num_rows",
{
10000, // 10k
Expand Down
70 changes: 49 additions & 21 deletions cpp/benchmarks/string/case.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,64 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf/strings/case.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

class StringCase : public cudf::benchmark {};
#include <nvbench/nvbench.cuh>

static void BM_case(benchmark::State& state)
void bench_case(nvbench::state& state)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows});
cudf::strings_column_view input(column->view());
auto const n_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const max_width = static_cast<int32_t>(state.get_int64("width"));
auto const encoding = state.get_string("encoding");

for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
cudf::strings::to_lower(input);
if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(max_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}
data_profile const profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);

auto col_view = column->view();

cudf::column::contents ascii_contents;
if (encoding == "ascii") {
data_profile ascii_profile = data_profile_builder().no_validity().distribution(
cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126); // nice ASCII range
auto input = cudf::strings_column_view(col_view);
auto ascii_column =
create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile);
auto ascii_data = ascii_column->view();

#define SORT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringCase, name) \
(::benchmark::State & st) { BM_case(st); } \
BENCHMARK_REGISTER_F(StringCase, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 12, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);
col_view = cudf::column_view(col_view.type(),
col_view.size(),
nullptr,
col_view.null_mask(),
col_view.null_count(),
0,
{input.offsets(), ascii_data});

ascii_contents = ascii_column->release();
}
auto input = cudf::strings_column_view(col_view);

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

state.add_element_count(input.chars_size(), "chars_size");
state.add_global_memory_reads<nvbench::int8_t>(input.chars_size());
state.add_global_memory_writes<nvbench::int8_t>(input.chars_size());

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); });
}

SORT_BENCHMARK_DEFINE(to_lower)
NVBENCH_BENCH(bench_case)
.set_name("strings_case")
.add_int64_axis("width", {32, 64, 128, 256, 512, 1024, 2048})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("encoding", {"ascii", "utf8"});
66 changes: 66 additions & 0 deletions cpp/benchmarks/string/char_types.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/strings/char_types/char_types.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

static void bench_char_types(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const api_type = state.get_string("api");

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const table_profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
auto const table =
create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));
auto input_types = cudf::strings::string_character_types::SPACE;

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
auto chars_size = input.chars_size();
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
if (api_type == "all") {
state.add_global_memory_writes<nvbench::int8_t>(num_rows); // output is a bool8 per row
} else {
state.add_global_memory_writes<nvbench::int8_t>(chars_size);
}

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if (api_type == "all") {
auto result = cudf::strings::all_characters_of_type(input, input_types);
} else {
auto result = cudf::strings::filter_characters_of_type(input, input_types);
}
});
}

NVBENCH_BENCH(bench_char_types)
.set_name("char_types")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
.add_string_axis("api", {"all", "filter"});
5 changes: 5 additions & 0 deletions cpp/cmake/thirdparty/patches/nvbench_override.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
"file" : "nvbench/use_existing_fmt.diff",
"issue" : "Fix add support for using an existing fmt [https://github.com/NVIDIA/nvbench/pull/125]",
"fixed_in" : ""
},
{
"file" : "nvbench/public_fmt_dep_in_conda.diff",
"issue" : "Propagate fmt requirement in conda envs [https://github.com/NVIDIA/nvbench/pull/127]",
"fixed_in" : ""
}
]
}
Expand Down
6 changes: 6 additions & 0 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ std::unique_ptr<column> empty_like(scalar const& input);
*
* Supports only fixed-width types.
*
* If the `mask_alloc` allocates a validity mask that mask is also uninitialized
* and the validity bits and the null count should be set by the caller.
*
* @param[in] input Immutable view of input column to emulate
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN
* @param[in] mr Device memory resource used to allocate the returned column's device memory
Expand All @@ -244,6 +247,9 @@ std::unique_ptr<column> allocate_like(
*
* Supports only fixed-width types.
*
* If the `mask_alloc` allocates a validity mask that mask is also uninitialized
* and the validity bits and the null count should be set by the caller.
*
* @param[in] input Immutable view of input column to emulate
* @param[in] size The desired number of elements that the new column should have capacity for
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN
Expand Down
16 changes: 15 additions & 1 deletion cpp/include/cudf/detail/contiguous_split.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class metadata_builder {
* @brief Destructor that will be implemented as default, required because metadata_builder_impl
* is incomplete at this stage.
*/
~metadata_builder() = default;
~metadata_builder();

/**
* @brief Add a column to this metadata builder.
Expand Down Expand Up @@ -105,9 +105,23 @@ class metadata_builder {
*/
std::vector<uint8_t> build() const;

/**
* @brief Clear the internal buffer containing all added metadata.
*/
void clear();

private:
std::unique_ptr<metadata_builder_impl> impl;
};

/**
* @copydoc pack_metadata
* @param builder The reusable builder object to create packed column metadata.
*/
std::vector<uint8_t> pack_metadata(table_view const& table,
uint8_t const* contiguous_buffer,
size_t buffer_size,
metadata_builder& builder);

} // namespace detail
} // namespace cudf
2 changes: 2 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ namespace cudf {
* @brief Returns the null count for a null mask of the specified `state`
* representing `size` elements.
*
* @throw std::invalid_argument if state is UNINITIALIZED
*
* @param state The state of the null mask
* @param size The number of elements represented by the mask
* @return The count of null elements
Expand Down
8 changes: 5 additions & 3 deletions cpp/include/cudf_test/iterator_utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -121,15 +121,17 @@ template <typename Iter>
* The returned iterator yields `false` (to mark `null`) at the indices corresponding to the
* pointers having `nullptr` values and `true` for the remaining indices.
*
* @note The input vector is referenced by the transform iterator, so the
* lifespan must be just as long as the iterator.
*
* @tparam T the data type
* @param ptrs The data pointers for which the validity iterator is computed
* @return auto Validity iterator
*/
template <class T>
[[maybe_unused]] static auto nulls_from_nullptrs(std::vector<T const*> const& ptrs)
{
// The vector `indices` is copied into the lambda as it can be destroyed at the caller site.
return thrust::make_transform_iterator(ptrs.begin(), [ptrs](auto ptr) { return ptr != nullptr; });
return thrust::make_transform_iterator(ptrs.begin(), [](auto ptr) { return ptr != nullptr; });
}

} // namespace iterators
Expand Down
14 changes: 8 additions & 6 deletions cpp/include/cudf_test/type_lists.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -84,11 +84,13 @@ std::enable_if_t<cudf::is_fixed_width<TypeParam>() && !cudf::is_timestamp_t<Type
thrust::host_vector<TypeParam>>
make_type_param_vector(std::initializer_list<T> const& init_list)
{
thrust::host_vector<TypeParam> vec(init_list.size());
std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) {
if constexpr (std::is_unsigned_v<TypeParam>) { return static_cast<TypeParam>(std::abs(e)); }
return static_cast<TypeParam>(e);
});
std::vector<T> input{init_list};
std::vector<TypeParam> vec(init_list.size());
std::transform(
std::cbegin(input), std::cend(input), std::begin(vec), [](auto const& e) -> TypeParam {
if constexpr (std::is_unsigned_v<TypeParam>) { return static_cast<TypeParam>(std::abs(e)); }
return static_cast<TypeParam>(e);
});
return vec;
}

Expand Down
Loading

0 comments on commit 8bbbab1

Please sign in to comment.