Skip to content

Commit

Permalink
Merge branch 'branch-0.19' of https://github.com/rapidsai/cudf into o…
Browse files Browse the repository at this point in the history
…rc_empty_dataframe
  • Loading branch information
rgsl888prabhu committed Mar 22, 2021
2 parents 7afc0f0 + 5d7767e commit 28e8eb2
Show file tree
Hide file tree
Showing 100 changed files with 4,773 additions and 1,862 deletions.
14 changes: 13 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ include(cmake/thirdparty/CUDF_GetLibcudacxx.cmake)
include(cmake/thirdparty/CUDF_GetGTest.cmake)
# Stringify libcudf and libcudacxx headers used in JIT operations
include(cmake/Modules/StringifyJITHeaders.cmake)
# find cuFile
include(cmake/Modules/FindcuFile.cmake)

###################################################################################################
# - library targets -------------------------------------------------------------------------------
Expand Down Expand Up @@ -244,6 +246,7 @@ add_library(cudf
src/io/statistics/column_stats.cu
src/io/utilities/data_sink.cpp
src/io/utilities/datasource.cpp
src/io/utilities/file_io_utilities.cpp
src/io/utilities/parsing_utils.cu
src/io/utilities/type_conversion.cpp
src/jit/cache.cpp
Expand Down Expand Up @@ -422,7 +425,8 @@ target_include_directories(cudf
"$<BUILD_INTERFACE:${CUDF_GENERATED_INCLUDE_DIR}/include>"
PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
INTERFACE "$<INSTALL_INTERFACE:include>"
"$<INSTALL_INTERFACE:include/libcudf/libcudaxx>")
"$<INSTALL_INTERFACE:include/libcudf/libcudacxx>"
"$<INSTALL_INTERFACE:include/libcudf/Thrust>")

# Add Conda library paths if specified
if(CONDA_LINK_DIRS)
Expand Down Expand Up @@ -469,6 +473,11 @@ else()
target_link_libraries(cudf PUBLIC CUDA::nvrtc CUDA::cudart CUDA::cuda_driver)
endif()

# Add cuFile interface if available
if(TARGET cuFile::cuFile_interface)
target_link_libraries(cudf PRIVATE cuFile::cuFile_interface)
endif()

file(WRITE "${CUDF_BINARY_DIR}/fatbin.ld"
[=[
SECTIONS
Expand Down Expand Up @@ -570,6 +579,9 @@ install(DIRECTORY
${CUDF_GENERATED_INCLUDE_DIR}/include/libcudacxx
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/libcudf)

install(DIRECTORY ${Thrust_SOURCE_DIR}/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust)

include(CMakePackageConfigHelpers)

configure_package_config_file(cmake/cudf-config.cmake.in "${CUDF_BINARY_DIR}/cmake/cudf-config.cmake"
Expand Down
3 changes: 3 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,16 @@ ConfigureBench(SUBWORD_TOKENIZER_BENCH text/subword_benchmark.cpp)
# - strings benchmark -------------------------------------------------------------------
ConfigureBench(STRINGS_BENCH
string/case_benchmark.cpp
string/combine_benchmark.cpp
string/contains_benchmark.cpp
string/convert_durations_benchmark.cpp
string/convert_floats_benchmark.cpp
string/copy_benchmark.cpp
string/extract_benchmark.cpp
string/factory_benchmark.cu
string/filter_benchmark.cpp
string/find_benchmark.cpp
string/replace_benchmark.cpp
string/split_benchmark.cpp
string/substring_benchmark.cpp
string/url_decode_benchmark.cpp)
2 changes: 1 addition & 1 deletion cpp/benchmarks/fixture/benchmark_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,4 @@ class benchmark : public ::benchmark::Fixture {
std::shared_ptr<rmm::mr::device_memory_resource> mr;
};

}; // namespace cudf
} // namespace cudf
72 changes: 72 additions & 0 deletions cpp/benchmarks/string/combine_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_wrapper.hpp>

class StringCombine : public cudf::benchmark {
};

static void BM_combine(benchmark::State& state)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 2, row_count{n_rows}, table_profile);
cudf::strings_column_view input1(table->view().column(0));
cudf::strings_column_view input2(table->view().column(1));
cudf::string_scalar separator("+");

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
cudf::strings::concatenate(table->view(), separator);
}

state.SetBytesProcessed(state.iterations() * (input1.chars_size() + input2.chars_size()));
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 4;
int const max_rowlen = 1 << 11;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringCombine, name) \
(::benchmark::State & st) { BM_combine(st); } \
BENCHMARK_REGISTER_F(StringCombine, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(concat)
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/factory_benchmark.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <rmm/device_uvector.hpp>

#include <thrust/execution_policy.h>
#include <thrust/transform.h>

#include <limits>

namespace {
using string_pair = thrust::pair<char const*, cudf::size_type>;
struct string_view_to_pair {
__device__ string_pair operator()(thrust::pair<cudf::string_view, bool> const& p)
{
return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0};
}
};
} // namespace

class StringsFactory : public cudf::benchmark {
};

static void BM_factory(benchmark::State& state)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
auto d_column = cudf::column_device_view::create(table->view().column(0));
rmm::device_vector<string_pair> pairs(d_column->size());
thrust::transform(thrust::device,
d_column->pair_begin<cudf::string_view, true>(),
d_column->pair_end<cudf::string_view, true>(),
pairs.data(),
string_view_to_pair{});

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
cudf::make_strings_column(pairs);
}

cudf::strings_column_view input(table->view().column(0));
state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringsFactory, name) \
(::benchmark::State & st) { BM_factory(st); } \
BENCHMARK_REGISTER_F(StringsFactory, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(factory)
4 changes: 4 additions & 0 deletions cpp/benchmarks/string/string_bench_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

#include <benchmark/benchmark.h>

#include <cudf/types.hpp>

#include <limits>

/**
* @brief Generate row count and row length argument ranges for a string benchmark.
*
Expand Down
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/substring_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/substring.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>

#include <thrust/iterator/constant_iterator.h>

class StringSubstring : public cudf::benchmark {
};

enum substring_type { position, multi_position, delimiter, multi_delimiter };

static void BM_substring(benchmark::State& state, substring_type rt)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));
auto starts_itr = thrust::constant_iterator<cudf::size_type>(1);
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
auto delim_itr = thrust::constant_iterator<std::string>(" ");
cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (rt) {
case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1);
break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringSubstring, name) \
(::benchmark::State & st) { BM_substring(st, substring_type::name); } \
BENCHMARK_REGISTER_F(StringSubstring, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(position)
STRINGS_BENCHMARK_DEFINE(multi_position)
STRINGS_BENCHMARK_DEFINE(delimiter)
STRINGS_BENCHMARK_DEFINE(multi_delimiter)
6 changes: 6 additions & 0 deletions cpp/cmake/Modules/FindcuFile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ find_package_handle_standard_args(cuFile
cuFile_VERSION
)

if (cuFile_INCLUDE_DIR AND NOT TARGET cuFile::cuFile_interface)
add_library(cuFile::cuFile_interface IMPORTED INTERFACE)
target_include_directories(cuFile::cuFile_interface INTERFACE "$<BUILD_INTERFACE:${cuFile_INCLUDE_DIR}>")
target_compile_options(cuFile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}")
target_compile_definitions(cuFile::cuFile_interface INTERFACE CUFILE_FOUND)
endif ()

if (cuFile_FOUND AND NOT TARGET cuFile::cuFile)
add_library(cuFile::cuFile UNKNOWN IMPORTED)
Expand Down
14 changes: 12 additions & 2 deletions cpp/cmake/cudf-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,22 @@ find_dependency(CUDAToolkit)
find_dependency(Threads)
find_dependency(ZLIB)

# Don't look for a Boost CMake configuration file because it adds the
# `-DBOOST_ALL_NO_LIB` and `-DBOOST_FILESYSTEM_DYN_LINK` compile defs
set(Boost_NO_BOOST_CMAKE ON)
find_dependency(Boost @CUDF_MIN_VERSION_Boost@ COMPONENTS filesystem)

find_dependency(Arrow @CUDF_VERSION_Arrow@)

set(ArrowCUDA_DIR "${Arrow_DIR}")
find_dependency(ArrowCUDA @CUDF_VERSION_Arrow@)
find_dependency(Boost @CUDF_MIN_VERSION_Boost@)

find_dependency(rmm @CUDF_MIN_VERSION_rmm@)
find_dependency(gtest @CUDF_MIN_VERSION_gtest@)
find_dependency(GTest @CUDF_MIN_VERSION_GTest@)

set(Thrust_ROOT "${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/Thrust")
find_dependency(Thrust @CUDF_MIN_VERSION_Thrust@)
thrust_create_target(cudf::Thrust FROM_OPTIONS)

list(POP_FRONT CMAKE_MODULE_PATH)

Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/CUDF_GetGTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ function(find_and_configure_gtest VERSION)
endif()
endfunction()

set(CUDF_MIN_VERSION_gtest 1.10.0)
set(CUDF_MIN_VERSION_GTest 1.10.0)

find_and_configure_gtest(${CUDF_MIN_VERSION_gtest})
find_and_configure_gtest(${CUDF_MIN_VERSION_GTest})
1 change: 1 addition & 0 deletions cpp/cmake/thirdparty/CUDF_GetThrust.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function(find_and_configure_thrust VERSION)

thrust_create_target(cudf::Thrust FROM_OPTIONS)
set(THRUST_LIBRARY "cudf::Thrust" PARENT_SCOPE)
set(Thrust_SOURCE_DIR "${Thrust_SOURCE_DIR}" PARENT_SCOPE)
endfunction()

set(CUDF_MIN_VERSION_Thrust 1.10.0)
Expand Down
Loading

0 comments on commit 28e8eb2

Please sign in to comment.