Skip to content

Commit

Permalink
Merge branch 'branch-22.06' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…feature/diff-non-numeric
  • Loading branch information
Matt711 committed Apr 15, 2022
2 parents 1f729ad + 4e668f2 commit 3229a3a
Show file tree
Hide file tree
Showing 38 changed files with 400 additions and 199 deletions.
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ void BM_csv_read_varying_input(benchmark::State& state)

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::read_csv(read_options);
}
Expand Down Expand Up @@ -98,6 +99,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
// only read the header in the first chunk
Expand Down
28 changes: 28 additions & 0 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,31 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,

return selected_segments;
}

// Executes the command and returns stderr output
std::string exec_cmd(std::string_view cmd)
{
// Switch stderr and stdout to only capture stderr
auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null");
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(redirected_cmd.c_str(), "r"), pclose);
CUDF_EXPECTS(pipe != nullptr, "popen() failed");

std::array<char, 128> buffer;
std::string error_out;
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
error_out += buffer.data();
}
return error_out;
}

void try_drop_l3_cache()
{
static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr;
if (not is_drop_cache_enabled) { return; }

std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"};
CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(),
drop_cache_cmds.cend(),
[](auto& cmd) { return exec_cmd(cmd).empty(); }),
"Failed to execute the drop cache command");
}
10 changes: 10 additions & 0 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,13 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
* The segments could be Parquet row groups or ORC stripes.
*/
std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk);

/**
* @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.
*
* Has no effect if the environment variable is not set.
* May require sudo access ro run successfully.
*
* @throw cudf::logic_error if the environment variable is set and the command fails
*/
void try_drop_l3_cache();
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/orc/orc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void BM_orc_read_varying_input(benchmark::State& state)

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::read_orc(read_opts);
}
Expand Down Expand Up @@ -117,6 +118,7 @@ void BM_orc_read_varying_options(benchmark::State& state)
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0

cudf::size_type rows_read = 0;
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/orc/orc_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* limitations under the License.
*/

#include "cudf/io/types.hpp"
#include <benchmark/benchmark.h>

#include <benchmarks/common/generate_input.hpp>
Expand All @@ -23,6 +22,7 @@
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/io/orc.hpp>
#include <cudf/io/types.hpp>

// to enable, run cmake with -DBUILD_BENCHMARKS=ON

Expand Down
2 changes: 2 additions & 0 deletions cpp/benchmarks/io/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void BM_parq_read_varying_input(benchmark::State& state)

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer const raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::read_parquet(read_opts);
}
Expand Down Expand Up @@ -117,6 +118,7 @@ void BM_parq_read_varying_options(benchmark::State& state)
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0

cudf::size_type rows_read = 0;
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ static void BM_multibyte_split(benchmark::State& state)

auto mem_stats_logger = cudf::memory_stats_logger();
for (auto _ : state) {
try_drop_l3_cache();
cuda_event_timer raii(state, true);
auto output = cudf::io::text::multibyte_split(*source, delim);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "cudf/column/column_view.hpp"
#include <cudf/column/column_view.hpp>
#include <cudf/sorting.hpp>

#include <cudf_test/base_fixture.hpp>
Expand Down
15 changes: 7 additions & 8 deletions cpp/benchmarks/string/convert_durations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>

#include <cudf/column/column_view.hpp>
#include <cudf/strings/convert/convert_durations.hpp>
#include <cudf/types.hpp>
#include <cudf/wrappers/durations.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>

#include <benchmark/benchmark.h>

#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <algorithm>
#include <random>

#include "../fixture/benchmark_fixture.hpp"
#include "../synchronization/synchronization.hpp"
#include "cudf/column/column_view.hpp"
#include "cudf/wrappers/durations.hpp"

class DurationsToString : public cudf::benchmark {
};
template <class TypeParam>
Expand Down
4 changes: 3 additions & 1 deletion cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ function(find_and_configure_cucollections)
cuco 0.0.1
GLOBAL_TARGETS cuco::cuco
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
)
if(NOT BUILD_SHARED_LIBS)
rapids_export_package(INSTALL cuco cudf-exports)
endif()

endfunction()

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/reduction_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar.hpp>

#include "cudf/lists/lists_column_view.hpp"
#include <rmm/cuda_stream_view.hpp>

namespace cudf {
Expand Down
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/src/kafka_callback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cudf_kafka/kafka_callback.hpp"
#include <cudf_kafka/kafka_callback.hpp>

#include <librdkafka/rdkafkacpp.h>

Expand Down
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/src/kafka_consumer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cudf_kafka/kafka_consumer.hpp"
#include <cudf_kafka/kafka_consumer.hpp>

#include <librdkafka/rdkafkacpp.h>

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
Expand All @@ -38,7 +39,6 @@
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include "cudf/utilities/traits.hpp"
#include <queue>
#include <vector>

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/structs/structs_column_view.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,9 +14,9 @@
* limitations under the License.
*/

#include "cudf/utilities/error.hpp"
#include <cudf/column/column.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/utilities/error.hpp>

namespace cudf {

Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/types.hpp>
#include <cudf/unary.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_lists.hpp>

#include "cudf/utilities/error.hpp"
#include <tests/binaryop/assert-binops.h>
#include <tests/binaryop/binop-fixture.hpp>

Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/hash_map/map_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/logical.h>
#include <thrust/pair.h>
#include <thrust/tabulate.h>

#include "rmm/exec_policy.hpp"
#include <cstdlib>
#include <iostream>
#include <limits>
Expand Down
10 changes: 6 additions & 4 deletions cpp/tests/iterator/value_iterator_test_strings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
#include "cudf/detail/utilities/vector_factories.hpp"
#include "rmm/cuda_stream_view.hpp"
#include "rmm/device_uvector.hpp"
#include <tests/iterator/iterator_tests.cuh>
#include "iterator_tests.cuh"

#include <cudf/detail/utilities/vector_factories.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <thrust/host_vector.h>
#include <thrust/iterator/counting_iterator.h>
Expand Down
10 changes: 5 additions & 5 deletions cpp/tests/partitioning/partition_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -13,16 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/copying.hpp>
#include <cudf/partitioning.hpp>
#include <cudf/table/table.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include "cudf/sorting.hpp"
#include <cudf/copying.hpp>
#include <cudf/partitioning.hpp>
#include <cudf/sorting.hpp>
#include <cudf/table/table.hpp>

template <typename T>
class PartitionTest : public cudf::test::BaseFixture {
Expand Down
3 changes: 3 additions & 0 deletions docs/cudf/source/api_docs/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ Computations / descriptive stats
DataFrame.round
DataFrame.skew
DataFrame.sum
DataFrame.sum_of_squares
DataFrame.std
DataFrame.var
DataFrame.nunique
Expand Down Expand Up @@ -248,9 +249,11 @@ Serialization / IO / conversion
DataFrame.to_dlpack
DataFrame.to_parquet
DataFrame.to_csv
DataFrame.to_cupy
DataFrame.to_hdf
DataFrame.to_dict
DataFrame.to_json
DataFrame.to_numpy
DataFrame.to_pandas
DataFrame.to_feather
DataFrame.to_records
Expand Down
2 changes: 2 additions & 0 deletions docs/cudf/source/api_docs/index_objects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ Conversion

Index.astype
Index.to_arrow
Index.to_cupy
Index.to_list
Index.to_numpy
Index.to_series
Index.to_frame
Index.to_pandas
Expand Down
2 changes: 2 additions & 0 deletions docs/cudf/source/api_docs/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,12 @@ Serialization / IO / conversion
:toctree: api/

Series.to_arrow
Series.to_cupy
Series.to_dlpack
Series.to_frame
Series.to_hdf
Series.to_json
Series.to_numpy
Series.to_pandas
Series.to_string
Series.from_arrow
Expand Down
1 change: 0 additions & 1 deletion docs/cudf/source/api_docs/string_handling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like
rsplit
startswith
strip
subword_tokenize
swapcase
title
token_count
Expand Down
Loading

0 comments on commit 3229a3a

Please sign in to comment.