diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 4e20c979f6c..eae915c47fe 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -137,6 +137,7 @@ test: - test -f $PREFIX/include/cudf/io/orc_metadata.hpp - test -f $PREFIX/include/cudf/io/orc.hpp - test -f $PREFIX/include/cudf/io/parquet.hpp + - test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp - test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp - test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp - test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2ffd62f1b53..825ea37c6ac 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -311,6 +311,7 @@ add_library( src/io/parquet/writer_impl.cu src/io/statistics/orc_column_statistics.cu src/io/statistics/parquet_column_statistics.cu + src/io/text/byte_range_info.cpp src/io/text/multibyte_split.cu src/io/utilities/column_buffer.cpp src/io/utilities/config_utils.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 99aeff0df93..054410c3265 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -276,7 +276,7 @@ ConfigureBench( # ################################################################################################## # * json benchmark ------------------------------------------------------------------- -ConfigureBench(JSON_BENCH string/json.cpp) +ConfigureBench(JSON_BENCH string/json.cu) # ################################################################################################## # * io benchmark --------------------------------------------------------------------- diff --git a/cpp/benchmarks/common/generate_input.cpp b/cpp/benchmarks/common/generate_input.cpp index d6564428a2e..6330beda54c 100644 --- a/cpp/benchmarks/common/generate_input.cpp +++ b/cpp/benchmarks/common/generate_input.cpp @@ -122,7 +122,7 @@ struct random_value_fn; * @brief Creates an random timestamp/duration value */ template -struct random_value_fn()>> { +struct random_value_fn()>> { std::function seconds_gen; std::function nanoseconds_gen; @@ -164,7 +164,7 @@ struct random_value_fn()>> { * @brief Creates an random fixed_point value. Not implemented yet. */ template -struct random_value_fn()>> { +struct random_value_fn()>> { using rep = typename T::rep; rep const lower_bound; rep const upper_bound; @@ -194,9 +194,7 @@ struct random_value_fn()>> * @brief Creates an random numeric value with the given distribution. */ template -struct random_value_fn< - T, - typename std::enable_if_t && cudf::is_numeric()>> { +struct random_value_fn && cudf::is_numeric()>> { T const lower_bound; T const upper_bound; distribution_fn dist; @@ -219,7 +217,7 @@ struct random_value_fn< * @brief Creates an boolean value with given probability of returning `true`. */ template -struct random_value_fn>> { +struct random_value_fn>> { std::bernoulli_distribution b_dist; random_value_fn(distribution_params const& desc) : b_dist{desc.probability_true} {} @@ -260,7 +258,7 @@ struct stored_as { // Use `int8_t` for bools because that's how they're stored in columns template -struct stored_as>> { +struct stored_as>> { using type = int8_t; }; diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 17bd650e722..43fee5c50a7 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -128,9 +128,7 @@ struct distribution_params; * @brief Numeric values are parameterized with a distribution type and bounds of the same type. */ template -struct distribution_params< - T, - typename std::enable_if_t && cudf::is_numeric()>> { +struct distribution_params && cudf::is_numeric()>> { distribution_id id; T lower_bound; T upper_bound; @@ -140,7 +138,7 @@ struct distribution_params< * @brief Booleans are parameterized with the probability of getting `true` value. */ template -struct distribution_params>> { +struct distribution_params>> { double probability_true; }; @@ -148,7 +146,7 @@ struct distribution_params> * @brief Timestamps and durations are parameterized with a distribution type and int64_t bounds. */ template -struct distribution_params()>> { +struct distribution_params()>> { distribution_id id; int64_t lower_bound; int64_t upper_bound; @@ -158,7 +156,7 @@ struct distribution_params()>> { * @brief Strings are parameterized by the distribution of their length, as an integral value. */ template -struct distribution_params>> { +struct distribution_params>> { distribution_params length_params; }; @@ -167,7 +165,7 @@ struct distribution_params -struct distribution_params>> { +struct distribution_params>> { cudf::type_id element_type; distribution_params length_params; cudf::size_type max_depth; @@ -175,7 +173,7 @@ struct distribution_params -struct distribution_params()>> { +struct distribution_params()>> { }; /** @@ -225,8 +223,7 @@ class data_profile { public: template && cuda::std::is_integral_v, T>* = - nullptr> + std::enable_if_t && cuda::std::is_integral_v, T>* = nullptr> distribution_params get_distribution_params() const { auto it = int_params.find(cudf::type_to_id()); @@ -239,7 +236,7 @@ class data_profile { } } - template , T>* = nullptr> + template , T>* = nullptr> distribution_params get_distribution_params() const { auto it = float_params.find(cudf::type_to_id()); @@ -258,7 +255,7 @@ class data_profile { return distribution_params{bool_probability}; } - template ()>* = nullptr> + template ()>* = nullptr> distribution_params get_distribution_params() const { auto it = int_params.find(cudf::type_to_id()); @@ -284,7 +281,7 @@ class data_profile { return list_dist_desc; } - template ()>* = nullptr> + template ()>* = nullptr> distribution_params get_distribution_params() const { using rep = typename T::rep; @@ -307,7 +304,7 @@ class data_profile { // discrete distributions (integers, strings, lists). Otherwise the call with have no effect. template , T>* = nullptr> + std::enable_if_t, T>* = nullptr> void set_distribution_params(Type_enum type_or_group, distribution_id dist, T lower_bound, @@ -331,7 +328,7 @@ class data_profile { // have continuous distributions (floating point types). Otherwise the call with have no effect. template , T>* = nullptr> + std::enable_if_t, T>* = nullptr> void set_distribution_params(Type_enum type_or_group, distribution_id dist, T lower_bound, diff --git a/cpp/benchmarks/common/random_distribution_factory.hpp b/cpp/benchmarks/common/random_distribution_factory.hpp index df2b6e0a754..f2f3833f15d 100644 --- a/cpp/benchmarks/common/random_distribution_factory.hpp +++ b/cpp/benchmarks/common/random_distribution_factory.hpp @@ -24,7 +24,7 @@ /** * @brief Generates a normal(binomial) distribution between zero and upper_bound. */ -template , T>* = nullptr> +template , T>* = nullptr> auto make_normal_dist(T upper_bound) { using uT = typename std::make_unsigned::type; @@ -42,7 +42,7 @@ auto make_normal_dist(T upper_bound) return std::normal_distribution(mean, stddev); } -template , T>* = nullptr> +template , T>* = nullptr> auto make_uniform_dist(T range_start, T range_end) { return std::uniform_int_distribution(range_start, range_end); @@ -62,7 +62,7 @@ double geometric_dist_p(T range_size) return p ? p : std::numeric_limits::epsilon(); } -template , T>* = nullptr> +template , T>* = nullptr> auto make_geometric_dist(T range_start, T range_end) { using uT = typename std::make_unsigned::type; @@ -82,7 +82,7 @@ auto make_geometric_dist(T range_start, T range_end) template using distribution_fn = std::function; -template , T>* = nullptr> +template , T>* = nullptr> distribution_fn make_distribution(distribution_id did, T lower_bound, T upper_bound) { switch (did) { diff --git a/cpp/benchmarks/string/json.cpp b/cpp/benchmarks/string/json.cpp deleted file mode 100644 index 1ade4d01e1e..00000000000 --- a/cpp/benchmarks/string/json.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -class JsonPath : public cudf::benchmark { -}; - -float frand() { return static_cast(rand()) / static_cast(RAND_MAX); } - -int rand_range(int min, int max) { return min + static_cast(frand() * (max - min)); } - -std::vector Books{ - "{\n\"category\": \"reference\",\n\"author\": \"Nigel Rees\",\n\"title\": \"Sayings of the " - "Century\",\n\"price\": 8.95\n}", - "{\n\"category\": \"fiction\",\n\"author\": \"Evelyn Waugh\",\n\"title\": \"Sword of " - "Honour\",\n\"price\": 12.99\n}", - "{\n\"category\": \"fiction\",\n\"author\": \"Herman Melville\",\n\"title\": \"Moby " - "Dick\",\n\"isbn\": \"0-553-21311-3\",\n\"price\": 8.99\n}", - "{\n\"category\": \"fiction\",\n\"author\": \"J. R. R. Tolkien\",\n\"title\": \"The Lord of the " - "Rings\",\n\"isbn\": \"0-395-19395-8\",\n\"price\": 22.99\n}"}; -constexpr int Approx_book_size = 110; -std::vector Bicycles{ - "{\"color\": \"red\", \"price\": 9.95}", - "{\"color\": \"green\", \"price\": 29.95}", - "{\"color\": \"blue\", \"price\": 399.95}", - "{\"color\": \"yellow\", \"price\": 99.95}", - "{\"color\": \"mauve\", \"price\": 199.95}", -}; -constexpr int Approx_bicycle_size = 33; -std::string Misc{"\n\"expensive\": 10\n"}; -std::string generate_field(std::vector const& values, int num_values) -{ - std::string res; - for (int idx = 0; idx < num_values; idx++) { - if (idx > 0) { res += std::string(",\n"); } - int vindex = std::min(static_cast(floor(frand() * values.size())), - static_cast(values.size() - 1)); - res += values[vindex]; - } - return res; -} - -std::string build_row(int desired_bytes) -{ - // always have at least 2 books and 2 bikes - int num_books = 2; - int num_bicycles = 2; - int remaining_bytes = - desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size)); - - // divide up the remainder between books and bikes - float book_pct = frand(); - float bicycle_pct = 1.0f - book_pct; - num_books += (remaining_bytes * book_pct) / Approx_book_size; - num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size; - - std::string books = "\"book\": [\n" + generate_field(Books, num_books) + "]\n"; - std::string bicycles = "\"bicycle\": [\n" + generate_field(Bicycles, num_bicycles) + "]\n"; - - std::string store = "\"store\": {\n"; - if (frand() <= 0.5f) { - store += books + std::string(",\n") + bicycles; - } else { - store += bicycles + std::string(",\n") + books; - } - store += std::string("}\n"); - - std::string row = std::string("{\n"); - if (frand() <= 0.5f) { - row += store + std::string(",\n") + Misc; - } else { - row += Misc + std::string(",\n") + store; - } - row += std::string("}\n"); - return row; -} - -template -static void BM_case(benchmark::State& state, QueryArg&&... query_arg) -{ - srand(5236); - auto iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [desired_bytes = state.range(1)](int index) { return build_row(desired_bytes); }); - int num_rows = state.range(0); - cudf::test::strings_column_wrapper input(iter, iter + num_rows); - cudf::strings_column_view scv(input); - size_t num_chars = scv.chars().size(); - - std::string json_path(query_arg...); - - for (auto _ : state) { - cuda_event_timer raii(state, true); - auto result = cudf::strings::get_json_object(scv, json_path); - cudaStreamSynchronize(0); - } - - // this isn't strictly 100% accurate. a given query isn't necessarily - // going to visit every single incoming character. but in spirit it does. - state.SetBytesProcessed(state.iterations() * num_chars); -} - -#define JSON_BENCHMARK_DEFINE(name, query) \ - BENCHMARK_CAPTURE(BM_case, name, query) \ - ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -JSON_BENCHMARK_DEFINE(query0, "$"); -JSON_BENCHMARK_DEFINE(query1, "$.store"); -JSON_BENCHMARK_DEFINE(query2, "$.store.book"); -JSON_BENCHMARK_DEFINE(query3, "$.store.*"); -JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]"); -JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category"); -JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']"); -JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']"); -JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]"); diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu new file mode 100644 index 00000000000..69c42f97d7f --- /dev/null +++ b/cpp/benchmarks/string/json.cu @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +class JsonPath : public cudf::benchmark { +}; + +const std::vector Books{ + R"json({ +"category": "reference", +"author": "Nigel Rees", +"title": "Sayings of the Century", +"price": 8.95 +})json", + R"json({ +"category": "fiction", +"author": "Evelyn Waugh", +"title": "Sword of Honour", +"price": 12.99 +})json", + R"json({ +"category": "fiction", +"author": "Herman Melville", +"title": "Moby Dick", +"isbn": "0-553-21311-3", +"price": 8.99 +})json", + R"json({ +"category": "fiction", +"author": "J. R. R. Tolkien", +"title": "The Lord of the Rings", +"isbn": "0-395-19395-8", +"price": 22.99 +})json"}; +constexpr int Approx_book_size = 110; +const std::vector Bicycles{ + R"json({"color": "red", "price": 9.95})json", + R"json({"color": "green", "price": 29.95})json", + R"json({"color": "blue", "price": 399.95})json", + R"json({"color": "yellow", "price": 99.95})json", + R"json({"color": "mauve", "price": 199.95})json", +}; +constexpr int Approx_bicycle_size = 33; +std::string Misc{"\n\"expensive\": 10\n"}; + +struct json_benchmark_row_builder { + int const desired_bytes; + cudf::size_type const num_rows; + cudf::column_device_view const d_books_bicycles[2]; // Books, Bicycles strings + cudf::column_device_view const d_book_pct; // Book percentage + cudf::column_device_view const d_misc_order; // Misc-Store order + cudf::column_device_view const d_store_order; // Books-Bicycles order + int32_t* d_offsets{}; + char* d_chars{}; + thrust::minstd_rand rng{5236}; + thrust::uniform_int_distribution dist{}; + + // internal data structure for {bytes, out_ptr} with operator+= + struct bytes_and_ptr { + cudf::size_type bytes; + char* ptr; + __device__ bytes_and_ptr& operator+=(cudf::string_view const& str_append) + { + bytes += str_append.size_bytes(); + if (ptr) { ptr = cudf::strings::detail::copy_string(ptr, str_append); } + return *this; + } + }; + + __device__ inline void copy_items(int this_idx, + cudf::size_type num_items, + bytes_and_ptr& output_str) + { + using param_type = thrust::uniform_int_distribution::param_type; + dist.param(param_type{0, d_books_bicycles[this_idx].size() - 1}); + cudf::string_view comma(",\n", 2); + for (int i = 0; i < num_items; i++) { + if (i > 0) { output_str += comma; } + int idx = dist(rng); + auto item = d_books_bicycles[this_idx].element(idx); + output_str += item; + } + } + + __device__ void operator()(cudf::size_type idx) + { + int num_books = 2; + int num_bicycles = 2; + int remaining_bytes = max( + 0, desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size))); + + // divide up the remainder between books and bikes + auto book_pct = d_book_pct.element(idx); + // {Misc, store} OR {store, Misc} + // store: {books, bicycles} OR store: {bicycles, books} + float bicycle_pct = 1.0f - book_pct; + num_books += (remaining_bytes * book_pct) / Approx_book_size; + num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size; + + char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + bytes_and_ptr output_str{0, out_ptr}; + // + cudf::string_view comma(",\n", 2); + cudf::string_view brace1("{\n", 2); + cudf::string_view store_member_start[2]{{"\"book\": [\n", 10}, {"\"bicycle\": [\n", 13}}; + cudf::string_view store("\"store\": {\n", 11); + cudf::string_view Misc{"\"expensive\": 10", 15}; + cudf::string_view brace2("\n}", 2); + cudf::string_view square2{"\n]", 2}; + + output_str += brace1; + if (d_misc_order.element(idx)) { // Misc. first. + output_str += Misc; + output_str += comma; + } + output_str += store; + for (int store_order = 0; store_order < 2; store_order++) { + if (store_order > 0) { output_str += comma; } + int this_idx = (d_store_order.element(idx) == store_order); + auto& mem_start = store_member_start[this_idx]; + output_str += mem_start; + copy_items(this_idx, this_idx == 0 ? num_books : num_bicycles, output_str); + output_str += square2; + } + output_str += brace2; + if (!d_misc_order.element(idx)) { // Misc, if not first. + output_str += comma; + output_str += Misc; + } + output_str += brace2; + if (!output_str.ptr) d_offsets[idx] = output_str.bytes; + } +}; + +auto build_json_string_column(int desired_bytes, int num_rows) +{ + data_profile profile; + profile.set_cardinality(0); + profile.set_null_frequency(-0.1); + profile.set_distribution_params( + cudf::type_id::FLOAT32, distribution_id::UNIFORM, 0.0, 1.0); + auto float_2bool_columns = + create_random_table({cudf::type_id::FLOAT32, cudf::type_id::BOOL8, cudf::type_id::BOOL8}, + row_count{num_rows}, + profile); + + cudf::test::strings_column_wrapper books(Books.begin(), Books.end()); + cudf::test::strings_column_wrapper bicycles(Bicycles.begin(), Bicycles.end()); + auto d_books = cudf::column_device_view::create(books); + auto d_bicycles = cudf::column_device_view::create(bicycles); + auto d_book_pct = cudf::column_device_view::create(float_2bool_columns->get_column(0)); + auto d_misc_order = cudf::column_device_view::create(float_2bool_columns->get_column(1)); + auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2)); + json_benchmark_row_builder jb{ + desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order}; + auto children = cudf::strings::detail::make_strings_children(jb, num_rows); + return cudf::make_strings_column( + num_rows, std::move(children.first), std::move(children.second), 0, {}); +} + +void BM_case(benchmark::State& state, std::string query_arg) +{ + srand(5236); + int num_rows = state.range(0); + int desired_bytes = state.range(1); + auto input = build_json_string_column(desired_bytes, num_rows); + cudf::strings_column_view scv(input->view()); + size_t num_chars = scv.chars().size(); + + std::string json_path(query_arg); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + auto result = cudf::strings::get_json_object(scv, json_path); + cudaStreamSynchronize(0); + } + + // this isn't strictly 100% accurate. a given query isn't necessarily + // going to visit every single incoming character. but in spirit it does. + state.SetBytesProcessed(state.iterations() * num_chars); +} + +#define JSON_BENCHMARK_DEFINE(name, query) \ + BENCHMARK_DEFINE_F(JsonPath, name)(::benchmark::State & state) { BM_case(state, query); } \ + BENCHMARK_REGISTER_F(JsonPath, name) \ + ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +JSON_BENCHMARK_DEFINE(query0, "$"); +JSON_BENCHMARK_DEFINE(query1, "$.store"); +JSON_BENCHMARK_DEFINE(query2, "$.store.book"); +JSON_BENCHMARK_DEFINE(query3, "$.store.*"); +JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]"); +JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category"); +JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']"); +JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']"); +JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]"); diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu index 48b31e5dae7..3be599e8c41 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu @@ -42,7 +42,7 @@ struct Functor { }; template -struct Functor>> { +struct Functor>> { static __device__ Float f(Float x) { if (ft == BANDWIDTH_BOUND) { diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh index 00742db7982..321cc3d19ef 100644 --- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh +++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,12 +30,12 @@ namespace cudf { namespace detail { struct calendrical_month_sequence_functor { template - typename std::enable_if_t::value, std::unique_ptr> - operator()(size_type n, - scalar const& input, - size_type months, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + std::enable_if_t::value, std::unique_ptr> operator()( + size_type n, + scalar const& input, + size_type months, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { // Return empty column if n = 0 if (n == 0) return cudf::make_empty_column(input.type()); @@ -59,8 +59,8 @@ struct calendrical_month_sequence_functor { } template - typename std::enable_if_t::value, std::unique_ptr> - operator()(Args&&...) + std::enable_if_t::value, std::unique_ptr> operator()( + Args&&...) { CUDF_FAIL("Cannot make a date_range of a non-datetime type"); } diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh index 76825285745..e176529ed6d 100644 --- a/cpp/include/cudf/detail/reduction.cuh +++ b/cpp/include/cudf/detail/reduction.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,8 +49,8 @@ namespace detail { template ::type, - typename std::enable_if_t() && - not cudf::is_fixed_point()>* = nullptr> + std::enable_if_t() && + not cudf::is_fixed_point()>* = nullptr> std::unique_ptr reduce(InputIterator d_in, cudf::size_type num_items, op::simple_op sop, @@ -92,7 +92,7 @@ std::unique_ptr reduce(InputIterator d_in, template ::type, - typename std::enable_if_t()>* = nullptr> + std::enable_if_t()>* = nullptr> std::unique_ptr reduce(InputIterator d_in, cudf::size_type num_items, op::simple_op sop, @@ -109,7 +109,7 @@ std::unique_ptr reduce(InputIterator d_in, template ::type, - typename std::enable_if_t>* = nullptr> + std::enable_if_t>* = nullptr> std::unique_ptr reduce(InputIterator d_in, cudf::size_type num_items, op::simple_op sop, diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 221e90a9816..f985135064f 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -426,7 +426,7 @@ struct typesAtomicCASImpl { * @returns The old value at `address` */ template -typename std::enable_if_t(), T> __forceinline__ __device__ +std::enable_if_t(), T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) { auto fun = cudf::detail::genericAtomicOperationImpl{}; @@ -435,7 +435,7 @@ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) // specialization for cudf::detail::timestamp types template -typename std::enable_if_t(), T> __forceinline__ __device__ +std::enable_if_t(), T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) { using R = typename T::rep; @@ -448,7 +448,7 @@ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) // specialization for cudf::detail::duration types template -typename std::enable_if_t(), T> __forceinline__ __device__ +std::enable_if_t(), T> __forceinline__ __device__ genericAtomicOperation(T* address, T const& update_value, BinaryOp op) { using R = typename T::rep; @@ -616,7 +616,7 @@ __forceinline__ __device__ T atomicCAS(T* address, T compare, T val) * * @returns The old value at `address` */ -template , T>* = nullptr> +template , T>* = nullptr> __forceinline__ __device__ T atomicAnd(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceAnd{}); @@ -637,7 +637,7 @@ __forceinline__ __device__ T atomicAnd(T* address, T val) * * @returns The old value at `address` */ -template , T>* = nullptr> +template , T>* = nullptr> __forceinline__ __device__ T atomicOr(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceOr{}); @@ -658,7 +658,7 @@ __forceinline__ __device__ T atomicOr(T* address, T val) * * @returns The old value at `address` */ -template , T>* = nullptr> +template , T>* = nullptr> __forceinline__ __device__ T atomicXor(T* address, T val) { return cudf::genericAtomicOperation(address, val, cudf::DeviceXor{}); diff --git a/cpp/include/cudf/detail/utilities/device_operators.cuh b/cpp/include/cudf/detail/utilities/device_operators.cuh index 9423cb6b998..87fef5bc187 100644 --- a/cpp/include/cudf/detail/utilities/device_operators.cuh +++ b/cpp/include/cudf/detail/utilities/device_operators.cuh @@ -61,27 +61,26 @@ CUDF_HOST_DEVICE inline auto max(LHS const& lhs, RHS const& rhs) * @brief Binary `sum` operator */ struct DeviceSum { - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs + rhs) { return lhs + rhs; } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { return T{typename T::duration{0}}; } - template < - typename T, - typename std::enable_if_t() && !cudf::is_fixed_point()>* = nullptr> + template () && !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { return T{0}; } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support device operator identity"); @@ -93,13 +92,13 @@ struct DeviceSum { * @brief `count` operator - used in rolling windows */ struct DeviceCount { - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE inline T operator()(const T& lhs, const T& rhs) { return T{DeviceCount{}(lhs.time_since_epoch(), rhs.time_since_epoch())}; } - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE inline T operator()(const T&, const T& rhs) { return rhs + T{1}; @@ -123,10 +122,9 @@ struct DeviceMin { return numeric::detail::min(lhs, rhs); } - template < - typename T, - typename std::enable_if_t && !cudf::is_dictionary() && - !cudf::is_fixed_point()>* = nullptr> + template && !cudf::is_dictionary() && + !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { // chrono types do not have std::numeric_limits specializations and should use T::max() @@ -135,7 +133,7 @@ struct DeviceMin { return cuda::std::numeric_limits::max(); } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMin identity"); @@ -143,13 +141,13 @@ struct DeviceMin { } // @brief identity specialized for string_view - template >* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline static constexpr T identity() { return string_view::max(); } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { return static_cast(T::max_value()); @@ -167,10 +165,9 @@ struct DeviceMax { return numeric::detail::max(lhs, rhs); } - template < - typename T, - typename std::enable_if_t && !cudf::is_dictionary() && - !cudf::is_fixed_point()>* = nullptr> + template && !cudf::is_dictionary() && + !cudf::is_fixed_point()>* = nullptr> static constexpr T identity() { // chrono types do not have std::numeric_limits specializations and should use T::min() @@ -179,20 +176,20 @@ struct DeviceMax { return cuda::std::numeric_limits::lowest(); } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceMax identity"); return cuda::std::numeric_limits::lowest(); } - template >* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline static constexpr T identity() { return string_view::min(); } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { return static_cast(T::lowest_value()); @@ -203,19 +200,19 @@ struct DeviceMax { * @brief binary `product` operator */ struct DeviceProduct { - template ()>* = nullptr> + template ()>* = nullptr> CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs * rhs) { return lhs * rhs; } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { return T{1}; } - template ()>* = nullptr> + template ()>* = nullptr> static constexpr T identity() { CUDF_FAIL("fixed_point does not yet support DeviceProduct identity"); @@ -227,7 +224,7 @@ struct DeviceProduct { * @brief binary `and` operator */ struct DeviceAnd { - template >* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs & rhs) { return (lhs & rhs); @@ -238,7 +235,7 @@ struct DeviceAnd { * @brief binary `or` operator */ struct DeviceOr { - template >* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs | rhs) { return (lhs | rhs); @@ -249,7 +246,7 @@ struct DeviceOr { * @brief binary `xor` operator */ struct DeviceXor { - template >* = nullptr> + template >* = nullptr> CUDF_HOST_DEVICE inline auto operator()(const T& lhs, const T& rhs) -> decltype(lhs ^ rhs) { return (lhs ^ rhs); diff --git a/cpp/include/cudf/io/text/byte_range_info.hpp b/cpp/include/cudf/io/text/byte_range_info.hpp new file mode 100644 index 00000000000..cb2d00f0d1f --- /dev/null +++ b/cpp/include/cudf/io/text/byte_range_info.hpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cudf { +namespace io { +namespace text { + +/** + * @brief stores offset and size used to indicate a byte range + */ +class byte_range_info { + private: + int64_t _offset; + int64_t _size; + + public: + constexpr byte_range_info() noexcept : _offset(0), _size(0) {} + constexpr byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size) + { + CUDF_EXPECTS(offset >= 0, "offset must be non-negative"); + CUDF_EXPECTS(size >= 0, "size must be non-negative"); + } + + constexpr byte_range_info(byte_range_info const& other) noexcept = default; + constexpr byte_range_info& operator=(byte_range_info const& other) noexcept = default; + + [[nodiscard]] constexpr int64_t offset() { return _offset; } + [[nodiscard]] constexpr int64_t size() { return _size; } +}; + +/** + * @brief Create a collection of consecutive ranges between [0, total_bytes). + * + * Each range wil be the same size except if `total_bytes` is not evenly divisible by + * `range_count`, in which case the last range size will be the remainder. + * + * @param total_bytes total number of bytes in all ranges + * @param range_count total number of ranges in which to divide bytes + * @return Vector of range objects + */ +std::vector create_byte_range_infos_consecutive(int64_t total_bytes, + int64_t range_count); + +/** + * @brief Create a byte_range_info which represents as much of a file as possible. Specifically, + * `[0, numeric_limit::max())`. + * + * @return `[0, numeric_limit::max())` + */ +byte_range_info create_byte_range_info_max(); + +} // namespace text +} // namespace io +} // namespace cudf diff --git a/cpp/include/cudf/io/text/data_chunk_source.hpp b/cpp/include/cudf/io/text/data_chunk_source.hpp index 5e6dda5a514..3499b86ab42 100644 --- a/cpp/include/cudf/io/text/data_chunk_source.hpp +++ b/cpp/include/cudf/io/text/data_chunk_source.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,7 @@ namespace text { */ class device_data_chunk { public: + virtual ~device_data_chunk() = default; [[nodiscard]] virtual char const* data() const = 0; [[nodiscard]] virtual std::size_t size() const = 0; virtual operator device_span() const = 0; @@ -52,6 +53,9 @@ class device_data_chunk { */ class data_chunk_reader { public: + virtual ~data_chunk_reader() = default; + virtual void skip_bytes(std::size_t size) = 0; + /** * @brief Get the next chunk of bytes from the data source * @@ -76,6 +80,7 @@ class data_chunk_reader { */ class data_chunk_source { public: + virtual ~data_chunk_source() = default; [[nodiscard]] virtual std::unique_ptr create_reader() const = 0; }; diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp index aeb4b7fff53..ffe159b59dc 100644 --- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp +++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,6 +89,8 @@ class istream_data_chunk_reader : public data_chunk_reader { } } + void skip_bytes(std::size_t size) override { _datastream->ignore(size); }; + std::unique_ptr get_next_chunk(std::size_t read_size, rmm::cuda_stream_view stream) override { @@ -143,6 +145,12 @@ class device_span_data_chunk_reader : public data_chunk_reader { public: device_span_data_chunk_reader(device_span data) : _data(data) {} + void skip_bytes(std::size_t read_size) override + { + if (read_size > _data.size() - _position) { read_size = _data.size() - _position; } + _position += read_size; + }; + std::unique_ptr get_next_chunk(std::size_t read_size, rmm::cuda_stream_view stream) override { diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index 06d15276a68..a908a9fa227 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,20 +89,6 @@ struct trie_device_view { */ constexpr uint8_t get_match_length(uint16_t idx) { return _nodes[idx].match_length; } - /** - * @brief returns the longest matching state of any state in the multistate. - */ - template - constexpr uint8_t get_match_length(multistate const& states) - { - int8_t val = 0; - for (uint8_t i = 0; i < states.size(); i++) { - auto match_length = get_match_length(states.get_tail(i)); - if (match_length > val) { val = match_length; } - } - return val; - } - private: constexpr void transition_enqueue_all( // char c, diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp index d42ee9f510e..25f7ef98a81 100644 --- a/cpp/include/cudf/io/text/multibyte_split.hpp +++ b/cpp/include/cudf/io/text/multibyte_split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -27,10 +28,53 @@ namespace cudf { namespace io { namespace text { +/** + * @brief Splits the source text into a strings column using a multiple byte delimiter. + * + * Providing a byte range allows multibyte_split to read a whole file, but only return the offsets + * of delimiters which begin within the range. If thinking in terms of "records", where each + * delimiter dictates the end of a record, all records which begin within the byte range provided + * will be returned, including any record which may begin in the range but end outside of the + * range. Records which begin outside of the range will ignored, even if those records end inside + * the range. + * + * @code{.pseudo} + * Examples: + * source: "abc..def..ghi..jkl.." + * delimiter: ".." + * + * byte_range: nullopt + * return: ["abc..", "def..", "ghi..", jkl..", ""] + * + * byte_range: [0, 2) + * return: ["abc.."] + * + * byte_range: [2, 9) + * return: ["def..", "ghi.."] + * + * byte_range: [11, 2) + * return: [] + * + * byte_range: [13, 7) + * return: ["jkl..", ""] + * @endcode + * + * @param source The source string + * @param delimiter UTF-8 encoded string for which to find offsets in the source + * @param byte_range range in which to consider offsets relevant + * @param mr Memory resource to use for the device memory allocation + * @return The strings found by splitting the source by the delimiter within the relevant byte + * range. + */ std::unique_ptr multibyte_split( data_chunk_source const& source, std::string const& delimiter, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + std::optional byte_range = std::nullopt, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr multibyte_split(data_chunk_source const& source, + std::string const& delimiter, + rmm::mr::device_memory_resource* mr); } // namespace text } // namespace io diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index 9ccd4d21682..1172a5a68cd 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -159,9 +159,9 @@ struct host_span : public cudf::detail::span_base, - void>::type* = nullptr> + std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) && + std::is_convertible_v, + void>* = nullptr> constexpr host_span(const host_span& other) noexcept : base(other.data(), other.size()) { @@ -220,9 +220,9 @@ struct device_span : public cudf::detail::span_base, - void>::type* = nullptr> + std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) && + std::is_convertible_v, + void>* = nullptr> constexpr device_span(const device_span& other) noexcept : base(other.data(), other.size()) { @@ -283,9 +283,9 @@ class base_2dspan { template typename OtherRowType, - typename std::enable_if, - RowType>, - void>::type* = nullptr> + std::enable_if_t, + RowType>, + void>* = nullptr> constexpr base_2dspan(base_2dspan const& other) noexcept : _data{other.data()}, _size{other.size()} { diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index f1ad11a9030..504ec6de405 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -676,13 +676,13 @@ constexpr inline bool is_nested(data_type type) template struct is_bit_castable_to_impl { - template ()>* = nullptr> + template ()>* = nullptr> constexpr bool operator()() { return false; } - template ()>* = nullptr> + template ()>* = nullptr> constexpr bool operator()() { if (not cuda::std::is_trivially_copyable_v || @@ -696,13 +696,13 @@ struct is_bit_castable_to_impl { }; struct is_bit_castable_from_impl { - template ()>* = nullptr> + template ()>* = nullptr> constexpr bool operator()(data_type) { return false; } - template ()>* = nullptr> + template ()>* = nullptr> constexpr bool operator()(data_type to) { return cudf::type_dispatcher(to, is_bit_castable_to_impl{}); diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index aa77686fee4..cd96748f081 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -178,7 +178,7 @@ bool validate_host_masks(std::vector const& expected_mask, * @return std::pair, std::vector> first is the * `column_view`'s data, and second is the column's bitmask. */ -template ()>* = nullptr> +template ()>* = nullptr> std::pair, std::vector> to_host(column_view c) { thrust::host_vector host_data(c.size()); @@ -197,7 +197,7 @@ std::pair, std::vector> to_host(column_view * @return std::pair, std::vector> first is the * `column_view`'s data, and second is the column's bitmask. */ -template ()>* = nullptr> +template ()>* = nullptr> std::pair, std::vector> to_host(column_view c) { using namespace numeric; diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index c190105e292..4005a4f9adc 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -93,31 +93,31 @@ class column_wrapper { template struct fixed_width_type_converter { // Are the types same - simply copy elements from [begin, end) to out - template , void>::type* = nullptr> + template , void>* = nullptr> constexpr ToT operator()(FromT element) const { return element; } // Are the types convertible or can target be constructed from source? - template && - (cudf::is_convertible::value || - std::is_constructible_v), - void>::type* = nullptr> + template < + typename FromT = From, + typename ToT = To, + std::enable_if_t && (cudf::is_convertible::value || + std::is_constructible_v), + void>* = nullptr> constexpr ToT operator()(FromT element) const { return static_cast(element); } // Convert integral values to timestamps - template && cudf::is_timestamp(), - void>::type* = nullptr> + template < + typename FromT = From, + typename ToT = To, + std::enable_if_t && cudf::is_timestamp(), void>* = nullptr> constexpr ToT operator()(FromT element) const { return ToT{typename ToT::duration{element}}; @@ -137,7 +137,7 @@ struct fixed_width_type_converter { template ()>* = nullptr> + std::enable_if_t()>* = nullptr> rmm::device_buffer make_elements(InputIterator begin, InputIterator end) { static_assert(cudf::is_fixed_width(), "Unexpected non-fixed width type."); @@ -162,8 +162,8 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) template () and - cudf::is_fixed_point()>* = nullptr> + std::enable_if_t() and + cudf::is_fixed_point()>* = nullptr> rmm::device_buffer make_elements(InputIterator begin, InputIterator end) { using RepType = typename ElementTo::rep; @@ -187,8 +187,8 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end) template () and - cudf::is_fixed_point()>* = nullptr> + std::enable_if_t() and + cudf::is_fixed_point()>* = nullptr> rmm::device_buffer make_elements(InputIterator begin, InputIterator end) { using namespace numeric; diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index e84417c91d6..ac2892a0f34 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -80,9 +80,8 @@ constexpr auto types_to_ids() * @return Vector of TypeParam with the values specified */ template -typename std::enable_if() && - !cudf::is_timestamp_t::value, - thrust::host_vector>::type +std::enable_if_t() && !cudf::is_timestamp_t::value, + thrust::host_vector> make_type_param_vector(std::initializer_list const& init_list) { thrust::host_vector vec(init_list.size()); @@ -100,8 +99,7 @@ make_type_param_vector(std::initializer_list const& init_list) * @return Vector of TypeParam with the values specified */ template -typename std::enable_if::value, - thrust::host_vector>::type +std::enable_if_t::value, thrust::host_vector> make_type_param_vector(std::initializer_list const& init_list) { thrust::host_vector vec(init_list.size()); @@ -119,8 +117,7 @@ make_type_param_vector(std::initializer_list const& init_list) */ template -typename std::enable_if, - thrust::host_vector>::type +std::enable_if_t, thrust::host_vector> make_type_param_vector(std::initializer_list const& init_list) { thrust::host_vector vec(init_list.size()); diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 995c6702cf8..c4538379836 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -119,9 +119,9 @@ struct compare_functor { // This is used to compare a scalar and a column value template - __device__ inline typename std::enable_if_t && - !std::is_same_v, - OutT> + __device__ inline std::enable_if_t && + !std::is_same_v, + OutT> operator()(cudf::size_type i) const { return cfunc_(lhs_dev_view_.is_valid(i), @@ -133,9 +133,9 @@ struct compare_functor { // This is used to compare a scalar and a column value template - __device__ inline typename std::enable_if_t && - std::is_same_v, - OutT> + __device__ inline std::enable_if_t && + std::is_same_v, + OutT> operator()(cudf::size_type i) const { return cfunc_(lhs_dev_view_.is_valid(), @@ -147,9 +147,9 @@ struct compare_functor { // This is used to compare 2 column values template - __device__ inline typename std::enable_if_t && - std::is_same_v, - OutT> + __device__ inline std::enable_if_t && + std::is_same_v, + OutT> operator()(cudf::size_type i) const { return cfunc_(lhs_dev_view_.is_valid(i), diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index fefe0b3c862..118a08ab26d 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,21 +31,20 @@ namespace cudf { namespace { struct size_of_helper { cudf::data_type type; - template ()>* = nullptr> + template ()>* = nullptr> constexpr int operator()() const { CUDF_FAIL("Invalid, non fixed-width element type."); return 0; } - template () && not is_fixed_point()>* = nullptr> + template () && not is_fixed_point()>* = nullptr> constexpr int operator()() const noexcept { return sizeof(T); } - template ()>* = nullptr> + template ()>* = nullptr> constexpr int operator()() const noexcept { // Only want the sizeof fixed_point::Rep as fixed_point::scale is stored in data_type diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 3412733f0b2..82e189b5a36 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -166,7 +166,7 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views, auto const output_size = output_view.size(); auto* output_data = output_view.data(); - size_type output_index = threadIdx.x + blockIdx.x * blockDim.x; + int64_t output_index = threadIdx.x + blockIdx.x * blockDim.x; size_type warp_valid_count = 0; unsigned active_mask; @@ -222,7 +222,7 @@ std::unique_ptr fused_concatenate(host_span views, auto const& d_offsets = std::get<2>(device_views); auto const output_size = std::get<3>(device_views); - CUDF_EXPECTS(output_size < static_cast(std::numeric_limits::max()), + CUDF_EXPECTS(output_size <= static_cast(std::numeric_limits::max()), "Total number of concatenated rows exceeds size_type range"); // Allocate output diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 122ad4a9752..4dbe9faaa47 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -278,14 +278,14 @@ struct launch_functor { launch_functor(column_view inp, mutable_column_view out) : input(inp), output(out) {} template - typename std::enable_if_t::value, void> operator()( + std::enable_if_t::value, void> operator()( rmm::cuda_stream_view stream) const { CUDF_FAIL("Cannot extract datetime component from non-timestamp column."); } template - typename std::enable_if_t::value, void> operator()( + std::enable_if_t::value, void> operator()( rmm::cuda_stream_view stream) const { thrust::transform(rmm::exec_policy(stream), @@ -326,18 +326,18 @@ std::unique_ptr apply_datetime_op(column_view const& column, struct add_calendrical_months_functor { template - typename std::enable_if_t::value, std::unique_ptr> - operator()(Args&&...) const + std::enable_if_t::value, std::unique_ptr> operator()( + Args&&...) const { CUDF_FAIL("Cannot extract datetime component from non-timestamp column."); } template - typename std::enable_if_t::value, std::unique_ptr> - operator()(column_view timestamp_column, - MonthIterator months_begin, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) const + std::enable_if_t::value, std::unique_ptr> operator()( + column_view timestamp_column, + MonthIterator months_begin, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { auto size = timestamp_column.size(); auto output_col_type = timestamp_column.type(); diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 301338fa1a8..871a36f7d62 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -122,8 +122,7 @@ struct compute_children_offsets_fn { */ struct dispatch_compute_indices { template - typename std::enable_if_t(), - std::unique_ptr> + std::enable_if_t(), std::unique_ptr> operator()(column_view const& all_keys, column_view const& all_indices, column_view const& new_keys, @@ -184,8 +183,7 @@ struct dispatch_compute_indices { } template - typename std::enable_if_t(), - std::unique_ptr> + std::enable_if_t(), std::unique_ptr> operator()(Args&&...) { CUDF_FAIL("dictionary concatenate not supported for this column type"); diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index c1fb1fa2180..7783e5f8daf 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -50,8 +50,7 @@ namespace { */ struct dispatch_compute_indices { template - typename std::enable_if_t(), - std::unique_ptr> + std::enable_if_t(), std::unique_ptr> operator()(dictionary_column_view const& input, column_view const& new_keys, rmm::cuda_stream_view stream, @@ -100,8 +99,7 @@ struct dispatch_compute_indices { } template - typename std::enable_if_t(), - std::unique_ptr> + std::enable_if_t(), std::unique_ptr> operator()(Args&&...) { CUDF_FAIL("dictionary set_keys not supported for this column type"); diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index c49142f91f9..e5bffcf21c1 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,9 +55,8 @@ struct const_tabulator { * by init and step. */ struct sequence_functor { - template < - typename T, - typename std::enable_if_t() and not cudf::is_boolean()>* = nullptr> + template () and not cudf::is_boolean()>* = nullptr> std::unique_ptr operator()(size_type size, scalar const& init, scalar const& step, @@ -83,9 +82,8 @@ struct sequence_functor { return result; } - template < - typename T, - typename std::enable_if_t() and not cudf::is_boolean()>* = nullptr> + template () and not cudf::is_boolean()>* = nullptr> std::unique_ptr operator()(size_type size, scalar const& init, rmm::cuda_stream_view stream, diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index 5154c867095..37d13d5aea3 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,14 +33,14 @@ namespace detail { namespace { struct nunique_functor { template - typename std::enable_if_t(), std::unique_ptr> - operator()(column_view const& values, - cudf::device_span group_labels, - size_type const num_groups, - cudf::device_span group_offsets, - null_policy null_handling, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + std::enable_if_t(), std::unique_ptr> operator()( + column_view const& values, + cudf::device_span group_labels, + size_type const num_groups, + cudf::device_span group_offsets, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto result = make_numeric_column( data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); @@ -94,8 +94,8 @@ struct nunique_functor { } template - typename std::enable_if_t(), std::unique_ptr> - operator()(Args&&...) + std::enable_if_t(), std::unique_ptr> operator()( + Args&&...) { CUDF_FAIL("list_view group_nunique not supported yet"); } diff --git a/cpp/src/groupby/sort/group_tdigest.cu b/cpp/src/groupby/sort/group_tdigest.cu index f48ab852f24..f726de9bf3c 100644 --- a/cpp/src/groupby/sort/group_tdigest.cu +++ b/cpp/src/groupby/sort/group_tdigest.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -707,9 +707,8 @@ struct get_scalar_minmax { }; struct typed_group_tdigest { - template < - typename T, - typename std::enable_if_t() || cudf::is_fixed_point()>* = nullptr> + template () || cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& col, cudf::device_span group_offsets, cudf::device_span group_labels, @@ -766,10 +765,9 @@ struct typed_group_tdigest { mr); } - template < - typename T, - typename... Args, - typename std::enable_if_t() && !cudf::is_fixed_point()>* = nullptr> + template () && !cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(Args&&...) { CUDF_FAIL("Non-numeric type in group_tdigest"); diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 13f5a57ac1f..e2e478af9ef 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -375,8 +375,8 @@ struct decode_op { * @return bool Whether the parsed value is valid. */ template and !std::is_same_v and - !cudf::is_fixed_point()>* = nullptr> + std::enable_if_t and !std::is_same_v and + !cudf::is_fixed_point()>* = nullptr> __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, @@ -402,7 +402,7 @@ struct decode_op { * * @return bool Whether the parsed value is valid. */ - template ()>* = nullptr> + template ()>* = nullptr> __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type output_type, @@ -423,7 +423,7 @@ struct decode_op { /** * @brief Dispatch for boolean type types. */ - template >* = nullptr> + template >* = nullptr> __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, @@ -447,7 +447,7 @@ struct decode_op { * @brief Dispatch for floating points, which are set to NaN if the input * is not valid. In such case, the validity mask is set to zero too. */ - template >* = nullptr> + template >* = nullptr> __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, @@ -466,8 +466,8 @@ struct decode_op { * @brief Dispatch for all other types. */ template and !std::is_floating_point_v and - !cudf::is_fixed_point()>* = nullptr> + std::enable_if_t and !std::is_floating_point_v and + !cudf::is_fixed_point()>* = nullptr> __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 5cf0b03a6f1..21455e3ab93 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -216,7 +216,7 @@ struct ConvertFunctor { * It is handled here rather than within convertStrToValue() as that function * is used by other types (ex. timestamp) that aren't 'booleable'. */ - template >* = nullptr> + template >* = nullptr> __host__ __device__ __forceinline__ bool operator()(char const* begin, char const* end, void* output_column, @@ -240,7 +240,7 @@ struct ConvertFunctor { * @brief Dispatch for floating points, which are set to NaN if the input * is not valid. In such case, the validity mask is set to zero too. */ - template >* = nullptr> + template >* = nullptr> __host__ __device__ __forceinline__ bool operator()(char const* begin, char const* end, void* out_buffer, @@ -257,9 +257,8 @@ struct ConvertFunctor { * @brief Default template operator() dispatch specialization all data types * (including wrapper types) that is not covered by above. */ - template < - typename T, - typename std::enable_if_t and !std::is_integral_v>* = nullptr> + template and !std::is_integral_v>* = nullptr> __host__ __device__ __forceinline__ bool operator()(char const* begin, char const* end, void* output_column, diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index 386e3d8d73a..47020023419 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -137,56 +137,51 @@ int inline constexpr encode_field_number(int field_number, ProtofType field_type } namespace { -template < - typename base_t, - typename std::enable_if_t and !std::is_enum_v>* = nullptr> +template and !std::is_enum_v>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return encode_field_number(field_number, ProtofType::FIXEDLEN); } -template < - typename base_t, - typename std::enable_if_t or std::is_enum_v>* = nullptr> +template or std::is_enum_v>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return encode_field_number(field_number, ProtofType::VARINT); } -template >* = nullptr> +template >* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return encode_field_number(field_number, ProtofType::FIXED32); } -template >* = nullptr> +template >* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return encode_field_number(field_number, ProtofType::FIXED64); } }; // namespace -template < - typename T, - typename std::enable_if_t or std::is_same_v>* = nullptr> +template or std::is_same_v>* = nullptr> int constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); } // containters change the field number encoding -template < - typename T, - typename std::enable_if_t>>* = nullptr> +template >>* = nullptr> int constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); } // optional fields don't change the field number encoding -template < - typename T, - typename std::enable_if_t>>* = nullptr> +template >>* = nullptr> int constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); @@ -244,19 +239,19 @@ class ProtobufReader { uint32_t read_field_size(const uint8_t* end); - template >* = nullptr> + template >* = nullptr> void read_field(T& value, const uint8_t* end) { value = get(); } - template >* = nullptr> + template >* = nullptr> void read_field(T& value, const uint8_t* end) { value = static_cast(get()); } - template >* = nullptr> + template >* = nullptr> void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); @@ -264,8 +259,7 @@ class ProtobufReader { m_cur += size; } - template >>* = nullptr> + template >>* = nullptr> void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); @@ -273,10 +267,9 @@ class ProtobufReader { m_cur += size; } - template < - typename T, - typename std::enable_if_t> and - !std::is_same_v>* = nullptr> + template > and + !std::is_same_v>* = nullptr> void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); @@ -284,9 +277,8 @@ class ProtobufReader { read(value.back(), size); } - template < - typename T, - typename std::enable_if_t>>* = nullptr> + template >>* = nullptr> void read_field(T& value, const uint8_t* end) { typename T::value_type contained_value; @@ -301,7 +293,7 @@ class ProtobufReader { read(value, size); } - template >* = nullptr> + template >* = nullptr> void read_field(T& value, const uint8_t* end) { memcpy(&value, m_cur, sizeof(T)); diff --git a/cpp/src/io/text/byte_range_info.cpp b/cpp/src/io/text/byte_range_info.cpp new file mode 100644 index 00000000000..290e0451839 --- /dev/null +++ b/cpp/src/io/text/byte_range_info.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +namespace cudf { +namespace io { +namespace text { + +byte_range_info create_byte_range_info_max() { return {0, std::numeric_limits::max()}; } + +std::vector create_byte_range_infos_consecutive(int64_t total_bytes, + int64_t range_count) +{ + auto range_size = util::div_rounding_up_safe(total_bytes, range_count); + auto ranges = std::vector(); + + ranges.reserve(range_size); + + for (int64_t i = 0; i < range_count; i++) { + auto offset = i * range_size; + auto size = std::min(range_size, total_bytes - offset); + ranges.emplace_back(offset, size); + } + + return ranges; +} + +} // namespace text +} // namespace io +} // namespace cudf diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index d287b9f2419..99f3bde3bf6 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,16 +18,24 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include +#include #include +#include +#include +#include +#include + #include #include @@ -96,7 +104,7 @@ __global__ void multibyte_split_init_kernel( cudf::size_type base_tile_idx, cudf::size_type num_tiles, cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, cudf::io::text::detail::scan_tile_status status = cudf::io::text::detail::scan_tile_status::invalid) { @@ -110,7 +118,7 @@ __global__ void multibyte_split_init_kernel( __global__ void multibyte_split_seed_kernel( cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, multistate tile_multistate_seed, uint32_t tile_output_offset) { @@ -124,17 +132,15 @@ __global__ void multibyte_split_seed_kernel( __global__ void multibyte_split_kernel( cudf::size_type base_tile_idx, cudf::io::text::detail::scan_tile_state_view tile_multistates, - cudf::io::text::detail::scan_tile_state_view tile_output_offsets, + cudf::io::text::detail::scan_tile_state_view tile_output_offsets, cudf::io::text::detail::trie_device_view trie, - int32_t chunk_input_offset, cudf::device_span chunk_input_chars, - cudf::device_span abs_output_delimiter_offsets, - cudf::device_span abs_output_chars) + cudf::device_span abs_output_delimiter_offsets) { using InputLoad = cub::BlockLoad; - using OffsetScan = cub::BlockScan; - using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; + using OffsetScan = cub::BlockScan; + using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback; __shared__ union { typename InputLoad::TempStorage input_load; @@ -166,7 +172,7 @@ __global__ void multibyte_split_kernel( // STEP 3: Flag matches - uint32_t thread_offsets[ITEMS_PER_THREAD]; + int64_t thread_offsets[ITEMS_PER_THREAD]; for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) { thread_offsets[i] = i < thread_input_size and trie.is_match(thread_states[i]); @@ -182,16 +188,11 @@ __global__ void multibyte_split_kernel( // Step 5: Assign outputs from each thread using match offsets. - if (abs_output_chars.size() > 0) { - for (int32_t i = 0; i < ITEMS_PER_THREAD and i < thread_input_size; i++) { - abs_output_chars[chunk_input_offset + thread_input_offset + i] = thread_chars[i]; - } - } - if (abs_output_delimiter_offsets.size() > 0) { for (int32_t i = 0; i < ITEMS_PER_THREAD and i < thread_input_size; i++) { if (trie.is_match(thread_states[i])) { - auto const match_end = base_tile_idx * ITEMS_PER_TILE + thread_input_offset + i + 1; + auto const match_end = + static_cast(base_tile_idx) * ITEMS_PER_TILE + thread_input_offset + i + 1; abs_output_delimiter_offsets[thread_offsets[i]] = match_end; } } @@ -236,17 +237,16 @@ std::vector get_streams(int32_t count, rmm::cuda_stream_p return streams; } -cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_source const& source, - cudf::io::text::detail::trie const& trie, - scan_tile_state& tile_multistates, - scan_tile_state& tile_offsets, - device_span output_buffer, - device_span output_char_buffer, - rmm::cuda_stream_view stream, - std::vector const& streams) +int64_t multibyte_split_scan_full_source(cudf::io::text::data_chunk_source const& source, + cudf::io::text::detail::trie const& trie, + scan_tile_state& tile_multistates, + scan_tile_state& tile_offsets, + device_span output_buffer, + rmm::cuda_stream_view stream, + std::vector const& streams) { CUDF_FUNC_RANGE(); - cudf::size_type chunk_offset = 0; + int64_t chunk_offset = 0; multibyte_split_init_kernel<<>>( // -TILES_PER_CHUNK, @@ -298,14 +298,14 @@ cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_sour tile_multistates, tile_offsets, trie.view(), - chunk_offset, *chunk, - output_buffer, - output_char_buffer); + output_buffer); cudaEventRecord(last_launch_event, chunk_stream); chunk_offset += chunk->size(); + + chunk.reset(); } cudaEventDestroy(last_launch_event); @@ -317,6 +317,7 @@ cudf::size_type multibyte_split_scan_full_source(cudf::io::text::data_chunk_sour std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, std::string const& delimiter, + byte_range_info byte_range, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr, rmm::cuda_stream_pool& stream_pool) @@ -336,7 +337,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); auto tile_multistates = scan_tile_state(num_tile_states, stream); - auto tile_offsets = scan_tile_state(num_tile_states, stream); + auto tile_offsets = scan_tile_state(num_tile_states, stream); auto streams = get_streams(concurrency, stream_pool); @@ -345,52 +346,104 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source trie, tile_multistates, tile_offsets, - cudf::device_span(static_cast(nullptr), 0), - cudf::device_span(static_cast(nullptr), 0), + cudf::device_span(static_cast(nullptr), 0), stream, streams); // allocate results - auto num_tiles = cudf::util::div_rounding_up_safe(bytes_total, ITEMS_PER_TILE); - auto num_results = tile_offsets.get_inclusive_prefix(num_tiles - 1, stream); - auto string_offsets = rmm::device_uvector(num_results + 2, stream, mr); - auto string_chars = rmm::device_uvector(bytes_total, stream, mr); + auto num_tiles = + cudf::util::div_rounding_up_safe(bytes_total, static_cast(ITEMS_PER_TILE)); + auto num_results = tile_offsets.get_inclusive_prefix(num_tiles - 1, stream); + + auto string_offsets = rmm::device_uvector(num_results + 2, stream); // first and last element are set manually to zero and size of input, respectively. // kernel is only responsible for determining delimiter offsets - auto string_count = static_cast(string_offsets.size() - 1); string_offsets.set_element_to_zero_async(0, stream); - string_offsets.set_element_async(string_count, bytes_total, stream); + string_offsets.set_element_async(string_offsets.size() - 1, bytes_total, stream); + + // kernel needs to find first and last relevant offset., as well as count of relevant offsets. multibyte_split_scan_full_source( source, trie, tile_multistates, tile_offsets, - cudf::device_span(string_offsets).subspan(1, num_results), - string_chars, + cudf::device_span(string_offsets).subspan(1, num_results), stream, streams); + auto relevant_offsets_begin = thrust::lower_bound(rmm::exec_policy(stream), + string_offsets.begin(), + string_offsets.end() - 1, + byte_range.offset()); + + auto relevant_offsets_end = thrust::upper_bound(rmm::exec_policy(stream), + string_offsets.begin(), + string_offsets.end() - 1, + byte_range.offset() + byte_range.size()) + + 1; + + auto string_offsets_out_size = relevant_offsets_end - relevant_offsets_begin; + + auto string_offsets_out = rmm::device_uvector(string_offsets_out_size, stream, mr); + + auto relevant_offset_first = + string_offsets.element(relevant_offsets_begin - string_offsets.begin(), stream); + auto relevant_offset_last = + string_offsets.element(relevant_offsets_end - string_offsets.begin() - 1, stream); + + auto string_chars_size = relevant_offset_last - relevant_offset_first; + auto string_chars = rmm::device_uvector(string_chars_size, stream, mr); + + // copy relevant offsets and adjust them to be zero-based. + thrust::transform(rmm::exec_policy(stream), + relevant_offsets_begin, + relevant_offsets_end, + string_offsets_out.begin(), + [relevant_offset_first] __device__(int64_t offset) { + return static_cast(offset - relevant_offset_first); + }); + + auto reader = source.create_reader(); + reader->skip_bytes(relevant_offset_first); + + auto relevant_bytes = reader->get_next_chunk(string_chars_size, stream); + + thrust::copy(rmm::exec_policy(stream), + relevant_bytes->data(), // + relevant_bytes->data() + relevant_bytes->size(), + string_chars.begin()); + + auto string_count = string_offsets_out.size() - 1; + return cudf::make_strings_column( - string_count, std::move(string_offsets), std::move(string_chars)); + string_count, std::move(string_offsets_out), std::move(string_chars)); } } // namespace detail std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, std::string const& delimiter, + std::optional byte_range, rmm::mr::device_memory_resource* mr) { auto stream = rmm::cuda_stream_default; auto stream_pool = rmm::cuda_stream_pool(2); - auto result = detail::multibyte_split(source, delimiter, stream, mr, stream_pool); - stream.synchronize(); + auto result = detail::multibyte_split( + source, delimiter, byte_range.value_or(create_byte_range_info_max()), stream, mr, stream_pool); return result; } +std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, + std::string const& delimiter, + rmm::mr::device_memory_resource* mr) +{ + return multibyte_split(source, delimiter, std::nullopt, mr); +} + } // namespace text } // namespace io } // namespace cudf diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index d1b2e2862c6..74b98eff010 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -98,7 +98,7 @@ struct parse_options { * * @return uint8_t Numeric value of the character, or `0` */ -template >* = nullptr> +template >* = nullptr> constexpr uint8_t decode_digit(char c, bool* valid_flag) { if (c >= '0' && c <= '9') return c - '0'; @@ -119,7 +119,7 @@ constexpr uint8_t decode_digit(char c, bool* valid_flag) * * @return uint8_t Numeric value of the character, or `0` */ -template >* = nullptr> +template >* = nullptr> constexpr uint8_t decode_digit(char c, bool* valid_flag) { if (c >= '0' && c <= '9') return c - '0'; diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index bf9b06a3602..9811a986224 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -50,7 +50,7 @@ struct scan_dispatcher { * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column with scan results */ - template >* = nullptr> + template >* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index bf2c83b5b8d..ee865f09f2e 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -225,7 +225,7 @@ struct scan_dispatcher { * * @tparam T type of input column */ - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& input, null_policy, rmm::cuda_stream_view stream, diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 7dc8e6cb2c4..e5303246452 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -260,8 +260,7 @@ struct same_element_type_dispatcher { return !(cudf::is_dictionary() || std::is_same_v); } - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr resolve_key(column_view const& keys, scalar const& keys_index, rmm::cuda_stream_view stream, @@ -271,8 +270,7 @@ struct same_element_type_dispatcher { return cudf::detail::get_element(keys, index.value(stream), stream, mr); } - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr resolve_key(column_view const&, scalar const&, rmm::cuda_stream_view, @@ -353,7 +351,7 @@ struct element_type_dispatcher { * @brief Specialization for reducing floating-point column types to any output type. */ template >* = nullptr> + std::enable_if_t>* = nullptr> std::unique_ptr reduce_numeric(column_view const& col, data_type const output_type, rmm::cuda_stream_view stream, @@ -375,8 +373,7 @@ struct element_type_dispatcher { /** * @brief Specialization for reducing integer column types to any output type. */ - template >* = nullptr> + template >* = nullptr> std::unique_ptr reduce_numeric(column_view const& col, data_type const output_type, rmm::cuda_stream_view stream, @@ -405,8 +402,7 @@ struct element_type_dispatcher { * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned scalar's device memory */ - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, data_type const output_type, rmm::cuda_stream_view stream, @@ -423,8 +419,7 @@ struct element_type_dispatcher { /** * @brief Specialization for reducing fixed_point column types to fixed_point number */ - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(column_view const& col, data_type const output_type, rmm::cuda_stream_view stream, @@ -436,8 +431,8 @@ struct element_type_dispatcher { } template () and - not cudf::is_fixed_point()>* = nullptr> + std::enable_if_t() and + not cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const&, data_type const, rmm::cuda_stream_view, diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 93bc6cf5ae5..d41bdb6ca5a 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -297,8 +297,7 @@ struct replace_nulls_functor { * `replace_nulls` with the appropriate data types. */ struct replace_nulls_scalar_kernel_forwarder { - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, cudf::scalar const& replacement, rmm::cuda_stream_view stream, diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 0e3ead3fd99..cd66cad392e 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ struct interleave_columns_functor { }; template -struct interleave_columns_impl>> { +struct interleave_columns_impl>> { std::unique_ptr operator()(table_view const& lists_columns, bool create_mask, rmm::cuda_stream_view stream, @@ -64,7 +64,7 @@ struct interleave_columns_impl -struct interleave_columns_impl>> { +struct interleave_columns_impl>> { std::unique_ptr operator()(table_view const& structs_columns, bool create_mask, rmm::cuda_stream_view stream, @@ -131,7 +131,7 @@ struct interleave_columns_impl -struct interleave_columns_impl>> { +struct interleave_columns_impl>> { std::unique_ptr operator()(table_view const& strings_columns, bool create_mask, rmm::cuda_stream_view stream, @@ -214,7 +214,7 @@ struct interleave_columns_impl -struct interleave_columns_impl()>> { +struct interleave_columns_impl()>> { std::unique_ptr operator()(table_view const& input, bool create_mask, rmm::cuda_stream_view stream, diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh index 7c52856b147..958da04e57c 100644 --- a/cpp/src/rolling/rolling_detail.cuh +++ b/cpp/src/rolling/rolling_detail.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,14 +91,14 @@ struct DeviceRolling { // operations we do support template - DeviceRolling(size_type _min_periods, typename std::enable_if_t()>* = nullptr) + DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { } // operations we don't support template - DeviceRolling(size_type _min_periods, typename std::enable_if_t()>* = nullptr) + DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { CUDF_FAIL("Invalid aggregation/type pair"); @@ -441,12 +441,12 @@ struct DeviceRollingLead { return cudf::is_fixed_width(); } - template ()>* = nullptr> + template ()>* = nullptr> DeviceRollingLead(size_type _row_offset) : row_offset(_row_offset) { } - template ()>* = nullptr> + template ()>* = nullptr> DeviceRollingLead(size_type _row_offset) : row_offset(_row_offset) { CUDF_FAIL("Invalid aggregation/type pair"); @@ -497,12 +497,12 @@ struct DeviceRollingLag { return cudf::is_fixed_width(); } - template ()>* = nullptr> + template ()>* = nullptr> DeviceRollingLag(size_type _row_offset) : row_offset(_row_offset) { } - template ()>* = nullptr> + template ()>* = nullptr> DeviceRollingLag(size_type _row_offset) : row_offset(_row_offset) { CUDF_FAIL("Invalid aggregation/type pair"); diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 81bf03f7c0a..9a2b1002997 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -49,26 +49,26 @@ inline double __device__ generic_round_half_even(double d) { return rint(d); } inline float __device__ generic_modf(float a, float* b) { return modff(a, b); } inline double __device__ generic_modf(double a, double* b) { return modf(a, b); } -template >* = nullptr> +template >* = nullptr> T __device__ generic_abs(T value) { return numeric::detail::abs(value); } -template >* = nullptr> +template >* = nullptr> T __device__ generic_abs(T value) { return value; } -template >* = nullptr> +template >* = nullptr> int16_t __device__ generic_sign(T value) { return value < 0 ? -1 : 1; } // this is needed to suppress warning: pointless comparison of unsigned integer with zero -template >* = nullptr> +template >* = nullptr> int16_t __device__ generic_sign(T) { return 1; @@ -83,13 +83,13 @@ constexpr inline auto is_supported_round_type() template struct half_up_zero { T n; // unused in the decimal_places = 0 case - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { return generic_round(e); } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -100,7 +100,7 @@ struct half_up_zero { template struct half_up_positive { T n; - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { T integer_part; @@ -108,7 +108,7 @@ struct half_up_positive { return integer_part + generic_round(fractional_part * n) / n; } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -119,13 +119,13 @@ struct half_up_positive { template struct half_up_negative { T n; - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { return generic_round(e / n) * n; } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U e) { auto const down = (e / n) * n; // result from rounding down @@ -136,13 +136,13 @@ struct half_up_negative { template struct half_even_zero { T n; // unused in the decimal_places = 0 case - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { return generic_round_half_even(e); } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -153,7 +153,7 @@ struct half_even_zero { template struct half_even_positive { T n; - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { T integer_part; @@ -161,7 +161,7 @@ struct half_even_positive { return integer_part + generic_round_half_even(fractional_part * n) / n; } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U) { assert(false); // Should never get here. Just for compilation @@ -172,13 +172,13 @@ struct half_even_positive { template struct half_even_negative { T n; - template ()>* = nullptr> + template ()>* = nullptr> __device__ U operator()(U e) { return generic_round_half_even(e / n) * n; } - template >* = nullptr> + template >* = nullptr> __device__ U operator()(U e) { auto const down_over_n = e / n; // use this to determine HALF_EVEN case @@ -205,7 +205,7 @@ struct half_even_fixed_point { template typename RoundFunctor, - typename std::enable_if_t()>* = nullptr> + std::enable_if_t()>* = nullptr> std::unique_ptr round_with(column_view const& input, int32_t decimal_places, rmm::cuda_stream_view stream, @@ -231,7 +231,7 @@ std::unique_ptr round_with(column_view const& input, template typename RoundFunctor, - typename std::enable_if_t()>* = nullptr> + std::enable_if_t()>* = nullptr> std::unique_ptr round_with(column_view const& input, int32_t decimal_places, rmm::cuda_stream_view stream, diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index c18b57d220f..3a2920f8f1a 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,8 @@ namespace cudf { namespace { struct scalar_construction_helper { template , - typename std::enable_if_t() and not is_fixed_point()>* = nullptr> + typename ScalarType = scalar_type_t, + std::enable_if_t() and not is_fixed_point()>* = nullptr> std::unique_ptr operator()(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { @@ -39,8 +39,8 @@ struct scalar_construction_helper { } template , - typename std::enable_if_t()>* = nullptr> + typename ScalarType = scalar_type_t, + std::enable_if_t()>* = nullptr> std::unique_ptr operator()(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { @@ -49,9 +49,7 @@ struct scalar_construction_helper { return std::unique_ptr(s); } - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(Args... args) const { CUDF_FAIL("Invalid type."); @@ -124,14 +122,14 @@ namespace { struct default_scalar_functor { data_type type; - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return make_fixed_width_scalar(data_type(type_to_id()), stream, mr); } - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu index 42b57bdb47a..5ce82cd3740 100644 --- a/cpp/src/sort/sort.cu +++ b/cpp/src/sort/sort.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ std::unique_ptr sort_by_key(table_view const& values, } struct inplace_column_sort_fn { - template ()>* = nullptr> + template ()>* = nullptr> void operator()(mutable_column_view& col, bool ascending, rmm::cuda_stream_view stream) const { CUDF_EXPECTS(!col.has_nulls(), "Nulls not supported for in-place sort"); @@ -68,7 +68,7 @@ struct inplace_column_sort_fn { } } - template ()>* = nullptr> + template ()>* = nullptr> void operator()(mutable_column_view&, bool, rmm::cuda_stream_view) const { CUDF_FAIL("Column type must be relationally comparable and fixed-width"); diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu index 74c796e7962..7a4072cf8ae 100644 --- a/cpp/src/sort/sort_column.cu +++ b/cpp/src/sort/sort_column.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ struct column_sorted_order_fn { * @param ascending True if sort order is ascending * @param stream CUDA stream used for device memory operations and kernel launches */ - template ()>* = nullptr> + template ()>* = nullptr> void radix_sort(column_view const& input, mutable_column_view& indices, bool ascending, @@ -68,7 +68,7 @@ struct column_sorted_order_fn { thrust::greater()); } } - template ()>* = nullptr> + template ()>* = nullptr> void radix_sort(column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view) { CUDF_FAIL("Only fixed-width types are suitable for faster sorting"); @@ -85,8 +85,7 @@ struct column_sorted_order_fn { * @param null_precedence How null rows are to be ordered * @param stream CUDA stream used for device memory operations and kernel launches */ - template ()>* = nullptr> + template ()>* = nullptr> void operator()(column_view const& input, mutable_column_view& indices, bool ascending, @@ -105,8 +104,7 @@ struct column_sorted_order_fn { } } - template ()>* = nullptr> + template ()>* = nullptr> void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view) { CUDF_FAIL("Column type must be relationally comparable"); diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu index 49aecf52625..d79a691a580 100644 --- a/cpp/src/sort/stable_sort_column.cu +++ b/cpp/src/sort/stable_sort_column.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ struct column_stable_sorted_order_fn { * @param indices Output sorted indices * @param stream CUDA stream used for device memory operations and kernel launches */ - template ()>* = nullptr> + template ()>* = nullptr> void faster_stable_sort(column_view const& input, mutable_column_view& indices, rmm::cuda_stream_view stream) @@ -38,7 +38,7 @@ struct column_stable_sorted_order_fn { thrust::stable_sort_by_key( rmm::exec_policy(stream), d_col.begin(), d_col.end(), indices.begin()); } - template ()>* = nullptr> + template ()>* = nullptr> void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view) { CUDF_FAIL("Only fixed-width types are suitable for faster stable sorting"); @@ -55,8 +55,7 @@ struct column_stable_sorted_order_fn { * @param null_precedence How null rows are to be ordered * @param stream CUDA stream used for device memory operations and kernel launches */ - template ()>* = nullptr> + template ()>* = nullptr> void operator()(column_view const& input, mutable_column_view& indices, bool ascending, @@ -74,8 +73,7 @@ struct column_stable_sorted_order_fn { faster_stable_sort(input, indices, stream); } } - template ()>* = nullptr> + template ()>* = nullptr> void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view) { CUDF_FAIL("Column type must be relationally comparable"); diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index efdee65c1f6..23bc5cf2dfe 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,10 @@ * limitations under the License. */ +#include +#include +#include + #include #include #include @@ -23,123 +27,90 @@ #include #include #include -#include -#include #include #include +#include + namespace cudf { namespace strings { namespace detail { + namespace { /** * @brief This functor handles both contains_re and match_re to minimize the number * of regex calls to find() to be inlined greatly reducing compile time. - * - * The stack is used to keep progress on evaluating the regex instructions on each string. - * So the size of the stack is in proportion to the number of instructions in the given regex - * pattern. - * - * There are three call types based on the number of regex instructions in the given pattern. - * Small to medium instruction lengths can use the stack effectively though smaller executes faster. - * Longer patterns require global memory. */ template struct contains_fn { reprog_device prog; - column_device_view d_strings; - bool bmatch{false}; // do not make this a template parameter to keep compile times down + column_device_view const d_strings; + bool const beginning_only; // do not make this a template parameter to keep compile times down __device__ bool operator()(size_type idx) { if (d_strings.is_null(idx)) return false; - string_view d_str = d_strings.element(idx); - int32_t begin = 0; - int32_t end = bmatch ? 1 // match only the beginning of the string; - : -1; // this handles empty strings too + auto const d_str = d_strings.element(idx); + int32_t begin = 0; + int32_t end = beginning_only ? 1 // match only the beginning of the string; + : -1; // match anywhere in the string return static_cast(prog.find(idx, d_str, begin, end)); } }; -// -std::unique_ptr contains_util( - strings_column_view const& strings, - std::string const& pattern, - regex_flags const flags, - bool beginning_only = false, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_count = strings.size(); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - - // compile regex into device object - auto prog = - reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream); - auto d_prog = *prog; - - // create the output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); +struct contains_dispatch_fn { + reprog_device d_prog; + bool const beginning_only; - // fill the output column - int regex_insts = d_prog.insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - contains_fn{d_prog, d_column, beginning_only}); - else if (regex_insts <= RX_MEDIUM_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - contains_fn{d_prog, d_column, beginning_only}); - else if (regex_insts <= RX_LARGE_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - contains_fn{d_prog, d_column, beginning_only}); - else + template + std::unique_ptr operator()(strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto results = make_numeric_column(data_type{type_id::BOOL8}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); + + auto const d_strings = column_device_view::create(input.parent(), stream); thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - contains_fn{d_prog, d_column, beginning_only}); - - results->set_null_count(strings.null_count()); - return results; -} + thrust::make_counting_iterator(input.size()), + results->mutable_view().data(), + contains_fn{d_prog, *d_strings, beginning_only}); + return results; + } +}; } // namespace std::unique_ptr contains_re( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - return contains_util(strings, pattern, flags, false, stream, mr); + auto d_prog = + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); + + return regex_dispatcher(*d_prog, contains_dispatch_fn{*d_prog, false}, input, stream, mr); } std::unique_ptr matches_re( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - return contains_util(strings, pattern, flags, true, stream, mr); + auto d_prog = + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); + + return regex_dispatcher(*d_prog, contains_dispatch_fn{*d_prog, true}, input, stream, mr); } } // namespace detail @@ -172,12 +143,12 @@ namespace { template struct count_fn { reprog_device prog; - column_device_view d_strings; + column_device_view const d_strings; __device__ int32_t operator()(unsigned int idx) { if (d_strings.is_null(idx)) return 0; - string_view d_str = d_strings.element(idx); + auto const d_str = d_strings.element(idx); auto const nchars = d_str.length(); int32_t find_count = 0; int32_t begin = 0; @@ -191,62 +162,45 @@ struct count_fn { } }; +struct count_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto results = make_numeric_column(data_type{type_id::INT32}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); + + auto const d_strings = column_device_view::create(input.parent(), stream); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + results->mutable_view().data(), + count_fn{d_prog, *d_strings}); + return results; + } +}; + } // namespace std::unique_ptr count_re( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = strings.size(); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // compile regex into device object - auto prog = - reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream); - auto d_prog = *prog; - - // create the output column - auto results = make_numeric_column(data_type{type_id::INT32}, - strings_count, - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - - // fill the output column - int regex_insts = d_prog.insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - count_fn{d_prog, d_column}); - else if (regex_insts <= RX_MEDIUM_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - count_fn{d_prog, d_column}); - else if (regex_insts <= RX_LARGE_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - count_fn{d_prog, d_column}); - else - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_results, - count_fn{d_prog, d_column}); + auto d_prog = + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); - results->set_null_count(strings.null_count()); - return results; + return regex_dispatcher(*d_prog, count_dispatch_fn{*d_prog}, input, stream, mr); } } // namespace detail diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu index d0a6825666b..ae996cafd2c 100644 --- a/cpp/src/strings/count_matches.cu +++ b/cpp/src/strings/count_matches.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -54,6 +55,27 @@ struct count_matches_fn { return count; } }; + +struct count_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(column_device_view const& d_strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto results = make_numeric_column( + data_type{type_id::INT32}, d_strings.size() + 1, mask_state::UNALLOCATED, stream, mr); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(d_strings.size()), + results->mutable_view().data(), + count_matches_fn{d_strings, d_prog}); + return results; + } +}; + } // namespace /** @@ -71,31 +93,7 @@ std::unique_ptr count_matches(column_device_view const& d_strings, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // Create output column - auto counts = make_numeric_column( - data_type{type_id::INT32}, d_strings.size() + 1, mask_state::UNALLOCATED, stream, mr); - auto d_counts = counts->mutable_view().data(); - - auto begin = thrust::make_counting_iterator(0); - auto end = thrust::make_counting_iterator(d_strings.size()); - - // Count matches - auto const regex_insts = d_prog.insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) { - count_matches_fn fn{d_strings, d_prog}; - thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - count_matches_fn fn{d_strings, d_prog}; - thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn); - } else if (regex_insts <= RX_LARGE_INSTS) { - count_matches_fn fn{d_strings, d_prog}; - thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn); - } else { - count_matches_fn fn{d_strings, d_prog}; - thrust::transform(rmm::exec_policy(stream), begin, end, d_counts, fn); - } - - return counts; + return regex_dispatcher(d_prog, count_dispatch_fn{d_prog}, d_strings, stream, mr); } } // namespace detail diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu index a67af9442f0..7394cdac6bb 100644 --- a/cpp/src/strings/extract/extract.cu +++ b/cpp/src/strings/extract/extract.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -77,53 +78,44 @@ struct extract_fn { thrust::fill(thrust::seq, d_output.begin(), d_output.end(), string_index_pair{nullptr, 0}); } }; + +struct extract_dispatch_fn { + reprog_device d_prog; + + template + void operator()(column_device_view const& d_strings, + cudf::detail::device_2dspan& d_indices, + rmm::cuda_stream_view stream) + { + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + d_strings.size(), + extract_fn{d_prog, d_strings, d_indices}); + } +}; } // namespace // std::unique_ptr
extract( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto const strings_count = strings.size(); - auto const strings_column = column_device_view::create(strings.parent(), stream); - auto const d_strings = *strings_column; - // compile regex into device object - auto prog = - reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream); - auto d_prog = *prog; - // extract should include groups - auto const groups = d_prog.group_counts(); + auto d_prog = + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); + + auto const groups = d_prog->group_counts(); CUDF_EXPECTS(groups > 0, "Group indicators not found in regex pattern"); - rmm::device_uvector indices(strings_count * groups, stream); - cudf::detail::device_2dspan d_indices(indices.data(), strings_count, groups); + auto indices = rmm::device_uvector(input.size() * groups, stream); + auto d_indices = + cudf::detail::device_2dspan(indices.data(), input.size(), groups); - auto const regex_insts = d_prog.insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) { - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - extract_fn{d_prog, d_strings, d_indices}); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - extract_fn{d_prog, d_strings, d_indices}); - } else if (regex_insts <= RX_LARGE_INSTS) { - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - extract_fn{d_prog, d_strings, d_indices}); - } else { - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - extract_fn{d_prog, d_strings, d_indices}); - } + auto const d_strings = column_device_view::create(input.parent(), stream); + regex_dispatcher(*d_prog, extract_dispatch_fn{*d_prog}, *d_strings, d_indices, stream); // build a result column for each group std::vector> results(groups); @@ -135,7 +127,7 @@ std::unique_ptr
extract( 0, [column_index, groups] __device__(size_type idx) { return (idx * groups) + column_index; })); - return make_strings_column(indices_itr, indices_itr + strings_count, stream, mr); + return make_strings_column(indices_itr, indices_itr + input.size(), stream, mr); }; std::transform(thrust::make_counting_iterator(0), diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index e27dccb9338..1f1474c777b 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -86,6 +87,28 @@ struct extract_fn { } } }; + +struct extract_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(column_device_view const& d_strings, + size_type total_groups, + offset_type const* d_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + rmm::device_uvector indices(total_groups, stream); + + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + d_strings.size(), + extract_fn{d_strings, d_prog, d_offsets, indices.data()}); + + return make_strings_column(indices.begin(), indices.end(), stream, mr); + } +}; + } // namespace /** @@ -94,14 +117,14 @@ struct extract_fn { * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_all_record( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto const strings_count = strings.size(); - auto const d_strings = column_device_view::create(strings.parent(), stream); + auto const strings_count = input.size(); + auto const d_strings = column_device_view::create(input.parent(), stream); // Compile regex into device object. auto d_prog = @@ -143,29 +166,8 @@ std::unique_ptr extract_all_record( auto const total_groups = cudf::detail::get_value(offsets->view(), strings_count, stream); - // Create an indices vector with the total number of groups that will be extracted. - rmm::device_uvector indices(total_groups, stream); - auto d_indices = indices.data(); - auto begin = thrust::make_counting_iterator(0); - - // Call the extract functor to fill in the indices vector. - auto const regex_insts = d_prog->insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) { - extract_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - extract_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else if (regex_insts <= RX_LARGE_INSTS) { - extract_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else { - extract_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } - - // Build the child strings column from the indices. - auto strings_output = make_strings_column(indices.begin(), indices.end(), stream, mr); + auto strings_output = regex_dispatcher( + *d_prog, extract_dispatch_fn{*d_prog}, *d_strings, total_groups, d_offsets, stream, mr); // Build the lists column from the offsets and the strings. return make_lists_column(strings_count, diff --git a/cpp/src/strings/regex/dispatcher.hpp b/cpp/src/strings/regex/dispatcher.hpp new file mode 100644 index 00000000000..9ff51d1c979 --- /dev/null +++ b/cpp/src/strings/regex/dispatcher.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +namespace strings { +namespace detail { + +/** + * The stack is used to keep progress (state) on evaluating the regex instructions on each string. + * So the size of the stack is in proportion to the number of instructions in the given regex + * pattern. + * + * There are four call types based on the number of regex instructions in the given pattern. + * Small, medium, and large instruction counts can use the stack effectively. + * Smaller stack sizes execute faster. + * + * Patterns with instruction counts bigger than large use global memory rather than the stack + * for managing the evaluation state data. + * + * @tparam Functor The functor to invoke with stack size templated value. + * @tparam Ts Parameter types for the functor call. + */ +template +constexpr decltype(auto) regex_dispatcher(reprog_device d_prog, Functor f, Ts&&... args) +{ + auto const num_regex_insts = d_prog.insts_counts(); + if (num_regex_insts <= RX_SMALL_INSTS) { + return f.template operator()(std::forward(args)...); + } + if (num_regex_insts <= RX_MEDIUM_INSTS) { + return f.template operator()(std::forward(args)...); + } + if (num_regex_insts <= RX_LARGE_INSTS) { + return f.template operator()(std::forward(args)...); + } + + return f.template operator()(std::forward(args)...); +} + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index ff86d7aa552..27e0bd4fac9 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "backref_re.cuh" +#include #include #include @@ -95,27 +96,54 @@ std::pair> parse_backrefs(std::string con return {rtn, backrefs}; } +template +struct replace_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(strings_column_view const& input, + string_view const& d_repl_template, + Iterator backrefs_begin, + Iterator backrefs_end, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto const d_strings = column_device_view::create(input.parent(), stream); + + auto children = make_strings_children( + backrefs_fn{ + *d_strings, d_prog, d_repl_template, backrefs_begin, backrefs_end}, + input.size(), + stream, + mr); + + return make_strings_column(input.size(), + std::move(children.first), + std::move(children.second), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); + } +}; + } // namespace // std::unique_ptr replace_with_backrefs( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, std::string const& replacement, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - if (strings.is_empty()) return make_empty_column(type_id::STRING); + if (input.is_empty()) return make_empty_column(type_id::STRING); CUDF_EXPECTS(!pattern.empty(), "Parameter pattern must not be empty"); CUDF_EXPECTS(!replacement.empty(), "Parameter replacement must not be empty"); - auto d_strings = column_device_view::create(strings.parent(), stream); // compile regex into device object auto d_prog = - reprog_device::create(pattern, flags, get_character_flags_table(), strings.size(), stream); - auto const regex_insts = d_prog->insts_counts(); + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); // parse the repl string for back-ref indicators auto const parse_result = parse_backrefs(replacement); @@ -125,45 +153,14 @@ std::unique_ptr replace_with_backrefs( string_view const d_repl_template = repl_scalar.value(); using BackRefIterator = decltype(backrefs.begin()); - - // create child columns - auto [offsets, chars] = [&] { - if (regex_insts <= RX_SMALL_INSTS) { - return make_strings_children( - backrefs_fn{ - *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()}, - strings.size(), - stream, - mr); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - return make_strings_children( - backrefs_fn{ - *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()}, - strings.size(), - stream, - mr); - } else if (regex_insts <= RX_LARGE_INSTS) { - return make_strings_children( - backrefs_fn{ - *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()}, - strings.size(), - stream, - mr); - } else { - return make_strings_children( - backrefs_fn{ - *d_strings, *d_prog, d_repl_template, backrefs.begin(), backrefs.end()}, - strings.size(), - stream, - mr); - } - }(); - - return make_strings_column(strings.size(), - std::move(offsets), - std::move(chars), - strings.null_count(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr)); + return regex_dispatcher(*d_prog, + replace_dispatch_fn{*d_prog}, + input, + d_repl_template, + backrefs.begin(), + backrefs.end(), + stream, + mr); } } // namespace detail diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 2b5380b76dd..22f6d2cba39 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -30,6 +31,8 @@ #include +#include + namespace cudf { namespace strings { namespace detail { @@ -40,16 +43,6 @@ using found_range = thrust::pair; /** * @brief This functor handles replacing strings by applying the compiled regex patterns * and inserting the corresponding new string within the matched range of characters. - * - * The logic includes computing the size of each string and also writing the output. - * - * The stack is used to keep progress on evaluating the regex instructions on each string. - * So the size of the stack is in proportion to the number of instructions in the given regex - * pattern. - * - * There are three call types based on the number of regex instructions in the given pattern. - * Small to medium instruction lengths can use the stack effectively though smaller executes faster. - * Longer patterns require global memory. Shorter patterns are common in data cleaning. */ template struct replace_multi_regex_fn { @@ -127,69 +120,76 @@ struct replace_multi_regex_fn { } }; +struct replace_dispatch_fn { + template + std::unique_ptr operator()(strings_column_view const& input, + device_span d_progs, + strings_column_view const& replacements, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto const d_strings = column_device_view::create(input.parent(), stream); + auto const d_repls = column_device_view::create(replacements.parent(), stream); + + auto found_ranges = rmm::device_uvector(d_progs.size() * input.size(), stream); + + auto children = make_strings_children( + replace_multi_regex_fn{*d_strings, d_progs, found_ranges.data(), *d_repls}, + input.size(), + stream, + mr); + + return make_strings_column(input.size(), + std::move(children.first), + std::move(children.second), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); + } +}; + } // namespace std::unique_ptr replace_re( - strings_column_view const& strings, + strings_column_view const& input, std::vector const& patterns, strings_column_view const& replacements, regex_flags const flags, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = strings.size(); - if (strings_count == 0) return make_empty_column(type_id::STRING); - if (patterns.empty()) // no patterns; just return a copy - return std::make_unique(strings.parent(), stream, mr); + if (input.is_empty()) { return make_empty_column(type_id::STRING); } + if (patterns.empty()) { // if no patterns; just return a copy + return std::make_unique(input.parent(), stream, mr); + } CUDF_EXPECTS(!replacements.has_nulls(), "Parameter replacements must not have any nulls"); - auto d_strings = column_device_view::create(strings.parent(), stream); - auto d_repls = column_device_view::create(replacements.parent(), stream); - auto d_char_table = get_character_flags_table(); - // compile regexes into device objects - size_type regex_insts = 0; - std::vector>> h_progs; - std::vector progs; - for (auto itr = patterns.begin(); itr != patterns.end(); ++itr) { - auto prog = reprog_device::create(*itr, flags, d_char_table, strings_count, stream); - regex_insts = std::max(regex_insts, prog->insts_counts()); - progs.push_back(*prog); - h_progs.emplace_back(std::move(prog)); - } + auto const d_char_table = get_character_flags_table(); + auto h_progs = std::vector>>( + patterns.size()); + std::transform(patterns.begin(), + patterns.end(), + h_progs.begin(), + [flags, d_char_table, input, stream](auto const& ptn) { + return reprog_device::create(ptn, flags, d_char_table, input.size(), stream); + }); + + // get the longest regex for the dispatcher + auto const max_prog = + std::max_element(h_progs.begin(), h_progs.end(), [](auto const& lhs, auto const& rhs) { + return lhs->insts_counts() < rhs->insts_counts(); + }); // copy all the reprog_device instances to a device memory array + std::vector progs; + std::transform(h_progs.begin(), h_progs.end(), std::back_inserter(progs), [](auto const& d_prog) { + return *d_prog; + }); auto d_progs = cudf::detail::make_device_uvector_async(progs, stream); - // create working buffer for ranges pairs - rmm::device_uvector found_ranges(patterns.size() * strings_count, stream); - auto d_found_ranges = found_ranges.data(); - - // create child columns - auto children = [&] { - // Each invocation is predicated on the stack size which is dependent on the number of regex - // instructions - if (regex_insts <= RX_SMALL_INSTS) { - replace_multi_regex_fn fn{*d_strings, d_progs, d_found_ranges, *d_repls}; - return make_strings_children(fn, strings_count, stream, mr); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - replace_multi_regex_fn fn{*d_strings, d_progs, d_found_ranges, *d_repls}; - return make_strings_children(fn, strings_count, stream, mr); - } else if (regex_insts <= RX_LARGE_INSTS) { - replace_multi_regex_fn fn{*d_strings, d_progs, d_found_ranges, *d_repls}; - return make_strings_children(fn, strings_count, stream, mr); - } else { - replace_multi_regex_fn fn{*d_strings, d_progs, d_found_ranges, *d_repls}; - return make_strings_children(fn, strings_count, stream, mr); - } - }(); - - return make_strings_column(strings_count, - std::move(children.first), - std::move(children.second), - strings.null_count(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr)); + return regex_dispatcher( + **max_prog, replace_dispatch_fn{}, input, d_progs, replacements, stream, mr); } } // namespace detail diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index 2c594bb86a8..d42359deeac 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -36,16 +37,6 @@ namespace { /** * @brief This functor handles replacing strings by applying the compiled regex pattern * and inserting the new string within the matched range of characters. - * - * The logic includes computing the size of each string and also writing the output. - * - * The stack is used to keep progress on evaluating the regex instructions on each string. - * So the size of the stack is in proportion to the number of instructions in the given regex - * pattern. - * - * There are three call types based on the number of regex instructions in the given pattern. - * Small to medium instruction lengths can use the stack effectively though smaller executes faster. - * Longer patterns require global memory. Shorter patterns are common in data cleaning. */ template struct replace_regex_fn { @@ -108,11 +99,37 @@ struct replace_regex_fn { } }; +struct replace_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(strings_column_view const& input, + string_view const& d_replacement, + size_type max_replace_count, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto const d_strings = column_device_view::create(input.parent(), stream); + + auto children = make_strings_children( + replace_regex_fn{*d_strings, d_prog, d_replacement, max_replace_count}, + input.size(), + stream, + mr); + + return make_strings_column(input.size(), + std::move(children.first), + std::move(children.second), + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); + } +}; + } // namespace // std::unique_ptr replace_re( - strings_column_view const& strings, + strings_column_view const& input, std::string const& pattern, string_scalar const& replacement, std::optional max_replace_count, @@ -120,49 +137,19 @@ std::unique_ptr replace_re( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = strings.size(); - if (strings_count == 0) return make_empty_column(type_id::STRING); + if (input.is_empty()) return make_empty_column(type_id::STRING); CUDF_EXPECTS(replacement.is_valid(stream), "Parameter replacement must be valid"); string_view d_repl(replacement.data(), replacement.size()); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_strings = *strings_column; // compile regex into device object - auto prog = - reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream); - auto d_prog = *prog; - auto const regex_insts = d_prog.insts_counts(); - - // copy null mask - auto null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - auto const null_count = strings.null_count(); - auto const maxrepl = max_replace_count.value_or(-1); - - // create child columns - auto children = [&] { - // Each invocation is predicated on the stack size which is dependent on the number of regex - // instructions - if (regex_insts <= RX_SMALL_INSTS) { - replace_regex_fn fn{d_strings, d_prog, d_repl, maxrepl}; - return make_strings_children(fn, strings_count, stream, mr); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - replace_regex_fn fn{d_strings, d_prog, d_repl, maxrepl}; - return make_strings_children(fn, strings_count, stream, mr); - } else if (regex_insts <= RX_LARGE_INSTS) { - replace_regex_fn fn{d_strings, d_prog, d_repl, maxrepl}; - return make_strings_children(fn, strings_count, stream, mr); - } else { - replace_regex_fn fn{d_strings, d_prog, d_repl, maxrepl}; - return make_strings_children(fn, strings_count, stream, mr); - } - }(); + auto d_prog = + reprog_device::create(pattern, flags, get_character_flags_table(), input.size(), stream); + + auto const maxrepl = max_replace_count.value_or(-1); - return make_strings_column(strings_count, - std::move(children.first), - std::move(children.second), - null_count, - std::move(null_mask)); + return regex_dispatcher( + *d_prog, replace_dispatch_fn{*d_prog}, input, d_repl, maxrepl, stream, mr); } } // namespace detail diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index 810e44cc27d..201556033ad 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -14,6 +14,11 @@ * limitations under the License. */ +#include +#include +#include +#include + #include #include #include @@ -24,19 +29,16 @@ #include #include -#include -#include - #include #include -#include +#include +#include namespace cudf { namespace strings { namespace detail { using string_index_pair = thrust::pair; -using findall_result = thrust::pair; namespace { /** @@ -47,27 +49,20 @@ template struct findall_fn { column_device_view const d_strings; reprog_device prog; - size_type column_index; + size_type const column_index; size_type const* d_counts; - findall_fn(column_device_view const& d_strings, - reprog_device& prog, - size_type column_index = -1, - size_type const* d_counts = nullptr) - : d_strings(d_strings), prog(prog), column_index(column_index), d_counts(d_counts) + __device__ string_index_pair operator()(size_type idx) { - } + if (d_strings.is_null(idx) || (column_index >= d_counts[idx])) { + return string_index_pair{nullptr, 0}; + } + + auto const d_str = d_strings.element(idx); + auto const nchars = d_str.length(); + int32_t spos = 0; + auto epos = static_cast(nchars); - // this will count columns as well as locate a specific string for a column - __device__ findall_result findall(size_type idx) - { - string_index_pair result{nullptr, 0}; - if (d_strings.is_null(idx) || (d_counts && (column_index >= d_counts[idx]))) - return findall_result{0, result}; - string_view d_str = d_strings.element(idx); - auto const nchars = d_str.length(); - int32_t spos = 0; - auto epos = static_cast(nchars); size_type column_count = 0; while (spos <= nchars) { if (prog.find(idx, d_str, spos, epos) <= 0) break; // no more matches found @@ -76,36 +71,40 @@ struct findall_fn { epos = static_cast(nchars); ++column_count; } - if (spos <= epos) { - spos = d_str.byte_offset(spos); // convert - epos = d_str.byte_offset(epos); // to bytes - result = string_index_pair{d_str.data() + spos, (epos - spos)}; - } - // return the strings location and the column count - return findall_result{column_count, result}; - } - __device__ string_index_pair operator()(size_type idx) - { - // this one only cares about the string - return findall(idx).second; + auto const result = [&] { + if (spos > epos) { return string_index_pair{nullptr, 0}; } + // convert character positions to byte positions + spos = d_str.byte_offset(spos); + epos = d_str.byte_offset(epos); + return string_index_pair{d_str.data() + spos, (epos - spos)}; + }(); + + return result; } }; -template -struct findall_count_fn : public findall_fn { - findall_count_fn(column_device_view const& strings, reprog_device& prog) - : findall_fn{strings, prog} - { - } +struct findall_dispatch_fn { + reprog_device d_prog; - __device__ size_type operator()(size_type idx) + template + std::unique_ptr operator()(column_device_view const& d_strings, + size_type column_index, + size_type const* d_find_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - // this one only cares about the column count - return findall_fn::findall(idx).first; + rmm::device_uvector indices(d_strings.size(), stream); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(d_strings.size()), + indices.begin(), + findall_fn{d_strings, d_prog, column_index, d_find_counts}); + + return make_strings_column(indices.begin(), indices.end(), stream, mr); } }; - } // namespace // @@ -124,38 +123,15 @@ std::unique_ptr
findall( reprog_device::create(pattern, flags, get_character_flags_table(), strings_count, stream); auto const regex_insts = d_prog->insts_counts(); - rmm::device_uvector find_counts(strings_count, stream); - auto d_find_counts = find_counts.data(); - - if (regex_insts <= RX_SMALL_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_find_counts, - findall_count_fn{*d_strings, *d_prog}); - else if (regex_insts <= RX_MEDIUM_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_find_counts, - findall_count_fn{*d_strings, *d_prog}); - else if (regex_insts <= RX_LARGE_INSTS) - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_find_counts, - findall_count_fn{*d_strings, *d_prog}); - else - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_find_counts, - findall_count_fn{*d_strings, *d_prog}); + auto find_counts = + count_matches(*d_strings, *d_prog, stream, rmm::mr::get_current_device_resource()); + auto d_find_counts = find_counts->mutable_view().data(); std::vector> results; size_type const columns = thrust::reduce( - rmm::exec_policy(stream), find_counts.begin(), find_counts.end(), 0, thrust::maximum{}); + rmm::exec_policy(stream), d_find_counts, d_find_counts + strings_count, 0, thrust::maximum{}); + // boundary case: if no columns, return all nulls column (issue #119) if (columns == 0) results.emplace_back(std::make_unique( @@ -166,39 +142,10 @@ std::unique_ptr
findall( strings_count)); for (int32_t column_index = 0; column_index < columns; ++column_index) { - rmm::device_uvector indices(strings_count, stream); - - if (regex_insts <= RX_SMALL_INSTS) - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - indices.begin(), - findall_fn{*d_strings, *d_prog, column_index, d_find_counts}); - else if (regex_insts <= RX_MEDIUM_INSTS) - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - indices.begin(), - findall_fn{*d_strings, *d_prog, column_index, d_find_counts}); - else if (regex_insts <= RX_LARGE_INSTS) - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - indices.begin(), - findall_fn{*d_strings, *d_prog, column_index, d_find_counts}); - else - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - indices.begin(), - findall_fn{*d_strings, *d_prog, column_index, d_find_counts}); - - // - results.emplace_back(make_strings_column(indices.begin(), indices.end(), stream, mr)); + results.emplace_back(regex_dispatcher( + *d_prog, findall_dispatch_fn{*d_prog}, *d_strings, column_index, d_find_counts, stream, mr)); } + return std::make_unique
(std::move(results)); } diff --git a/cpp/src/strings/search/findall_record.cu b/cpp/src/strings/search/findall_record.cu index c93eb0c17db..95e347a7c35 100644 --- a/cpp/src/strings/search/findall_record.cu +++ b/cpp/src/strings/search/findall_record.cu @@ -15,6 +15,9 @@ */ #include +#include +#include +#include #include #include @@ -26,9 +29,6 @@ #include #include -#include -#include - #include #include @@ -75,6 +75,27 @@ struct findall_fn { } }; +struct findall_dispatch_fn { + reprog_device d_prog; + + template + std::unique_ptr operator()(column_device_view const& d_strings, + size_type total_matches, + offset_type const* d_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + rmm::device_uvector indices(total_matches, stream); + + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + d_strings.size(), + findall_fn{d_strings, d_prog, d_offsets, indices.data()}); + + return make_strings_column(indices.begin(), indices.end(), stream, mr); + } +}; + } // namespace // @@ -121,30 +142,11 @@ std::unique_ptr findall_record( rmm::exec_policy(stream), d_offsets, d_offsets + strings_count + 1, d_offsets); // Create indices vector with the total number of groups that will be extracted - auto total_matches = cudf::detail::get_value(offsets->view(), strings_count, stream); - - rmm::device_uvector indices(total_matches, stream); - auto d_indices = indices.data(); - auto begin = thrust::make_counting_iterator(0); - - // Build the string indices - auto const regex_insts = d_prog->insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) { - findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else if (regex_insts <= RX_LARGE_INSTS) { - findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } else { - findall_fn fn{*d_strings, *d_prog, d_offsets, d_indices}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, fn); - } + auto const total_matches = + cudf::detail::get_value(offsets->view(), strings_count, stream); - // Build the child strings column from the resulting indices - auto strings_output = make_strings_column(indices.begin(), indices.end(), stream, mr); + auto strings_output = regex_dispatcher( + *d_prog, findall_dispatch_fn{*d_prog}, *d_strings, total_matches, d_offsets, stream, mr); // Build the lists column from the offsets and the strings return make_lists_column(strings_count, diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index d80148f2fe6..a8a2467dd76 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -110,6 +111,28 @@ struct token_reader_fn { } }; +struct generate_dispatch_fn { + reprog_device d_prog; + + template + rmm::device_uvector operator()(column_device_view const& d_strings, + size_type total_tokens, + split_direction direction, + offset_type const* d_offsets, + rmm::cuda_stream_view stream) + { + rmm::device_uvector tokens(total_tokens, stream); + + thrust::for_each_n( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + d_strings.size(), + token_reader_fn{d_strings, d_prog, direction, d_offsets, tokens.data()}); + + return tokens; + } +}; + /** * @brief Call regex to split each input string into tokens. * @@ -148,24 +171,8 @@ rmm::device_uvector generate_tokens(column_device_view const& // the last offset entry is the total number of tokens to be generated auto const total_tokens = cudf::detail::get_value(offsets, strings_count, stream); - // generate tokens for each string - rmm::device_uvector tokens(total_tokens, stream); - auto const regex_insts = d_prog.insts_counts(); - if (regex_insts <= RX_SMALL_INSTS) { - token_reader_fn reader{d_strings, d_prog, direction, d_offsets, tokens.data()}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader); - } else if (regex_insts <= RX_MEDIUM_INSTS) { - token_reader_fn reader{d_strings, d_prog, direction, d_offsets, tokens.data()}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader); - } else if (regex_insts <= RX_LARGE_INSTS) { - token_reader_fn reader{d_strings, d_prog, direction, d_offsets, tokens.data()}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader); - } else { - token_reader_fn reader{d_strings, d_prog, direction, d_offsets, tokens.data()}; - thrust::for_each_n(rmm::exec_policy(stream), begin, strings_count, reader); - } - - return tokens; + return regex_dispatcher( + d_prog, generate_dispatch_fn{d_prog}, d_strings, total_tokens, direction, d_offsets, stream); } /** diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 5cc4ce5f6c9..f77ab7aa3d9 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,19 +34,19 @@ namespace detail { namespace { // anonymous namespace template struct unary_cast { - template () && - cudf::is_numeric())>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_numeric() && cudf::is_numeric())>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return static_cast(element); } - template () && - cudf::is_timestamp())>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_timestamp() && cudf::is_timestamp())>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { // Convert source tick counts into target tick counts without blindly truncating them @@ -55,46 +55,46 @@ struct unary_cast { return TargetT{cuda::std::chrono::floor(element.time_since_epoch())}; } - template () && - cudf::is_duration())>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_duration() && cudf::is_duration())>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return TargetT{cuda::std::chrono::floor(element)}; } - template () && - cudf::is_duration()>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t() && cudf::is_duration()>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return TargetT{static_cast(element)}; } - template () && - cudf::is_duration())>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_timestamp() && cudf::is_duration())>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return TargetT{cuda::std::chrono::floor(element.time_since_epoch())}; } - template () && - cudf::is_numeric()>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t() && cudf::is_numeric()>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return static_cast(element.count()); } - template () && - cudf::is_timestamp())>* = nullptr> + template < + typename SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_duration() && cudf::is_timestamp())>* = nullptr> __device__ inline TargetT operator()(SourceT const element) { return TargetT{cuda::std::chrono::floor(element)}; @@ -107,20 +107,20 @@ struct fixed_point_unary_cast { using FixedPointT = std::conditional_t(), _SourceT, _TargetT>; using DeviceT = device_storage_type_t; - template () && - cudf::is_numeric())>* = nullptr> + template < + typename SourceT = _SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_fixed_point<_SourceT>() && cudf::is_numeric())>* = nullptr> __device__ inline TargetT operator()(DeviceT const element) { auto const fp = SourceT{numeric::scaled_integer{element, scale}}; return static_cast(fp); } - template () && - cudf::is_fixed_point())>* = nullptr> + template < + typename SourceT = _SourceT, + typename TargetT = _TargetT, + std::enable_if_t<(cudf::is_numeric<_SourceT>() && cudf::is_fixed_point())>* = nullptr> __device__ inline DeviceT operator()(SourceT const element) { return TargetT{element, scale}.value(); @@ -169,7 +169,7 @@ struct device_cast { * * @return std::unique_ptr Returned column with new @p scale */ -template ()>* = nullptr> +template ()>* = nullptr> std::unique_ptr rescale(column_view input, numeric::scale_type scale, rmm::cuda_stream_view stream, @@ -207,10 +207,9 @@ struct dispatch_unary_cast_to { dispatch_unary_cast_to(column_view inp) : input(inp) {} - template < - typename TargetT, - typename SourceT = _SourceT, - typename std::enable_if_t()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -234,10 +233,10 @@ struct dispatch_unary_cast_to { return output; } - template () && - cudf::is_numeric()>* = nullptr> + template < + typename TargetT, + typename SourceT = _SourceT, + std::enable_if_t() && cudf::is_numeric()>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -264,10 +263,10 @@ struct dispatch_unary_cast_to { return output; } - template () && - cudf::is_fixed_point()>* = nullptr> + template < + typename TargetT, + typename SourceT = _SourceT, + std::enable_if_t() && cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -294,11 +293,10 @@ struct dispatch_unary_cast_to { return output; } - template < - typename TargetT, - typename SourceT = _SourceT, - typename std::enable_if_t() && cudf::is_fixed_point() && - std::is_same_v>* = nullptr> + template () && cudf::is_fixed_point() && + std::is_same_v>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -310,11 +308,10 @@ struct dispatch_unary_cast_to { return detail::rescale(input, numeric::scale_type{type.scale()}, stream, mr); } - template < - typename TargetT, - typename SourceT = _SourceT, - typename std::enable_if_t() && cudf::is_fixed_point() && - not std::is_same_v>* = nullptr> + template () && cudf::is_fixed_point() && + not std::is_same_v>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -356,8 +353,8 @@ struct dispatch_unary_cast_to { } template ()>* = nullptr> + typename SourceT = _SourceT, + std::enable_if_t()>* = nullptr> std::unique_ptr operator()(data_type, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) @@ -379,7 +376,7 @@ struct dispatch_unary_cast_from { dispatch_unary_cast_from(column_view inp) : input(inp) {} - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(data_type type, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index 474c7b76ddc..e92d5a1ca7e 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -348,7 +348,7 @@ std::unique_ptr transform_fn(cudf::dictionary_column_view const& i template struct MathOpDispatcher { - template >* = nullptr> + template >* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -362,7 +362,7 @@ struct MathOpDispatcher { } struct dictionary_dispatch { - template >* = nullptr> + template >* = nullptr> std::unique_ptr operator()(cudf::dictionary_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -377,9 +377,9 @@ struct MathOpDispatcher { } }; - template and - std::is_same_v>* = nullptr> + template < + typename T, + std::enable_if_t and std::is_same_v>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -401,7 +401,7 @@ struct MathOpDispatcher { template struct BitwiseOpDispatcher { - template >* = nullptr> + template >* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -415,7 +415,7 @@ struct BitwiseOpDispatcher { } struct dictionary_dispatch { - template >* = nullptr> + template >* = nullptr> std::unique_ptr operator()(cudf::dictionary_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -431,8 +431,7 @@ struct BitwiseOpDispatcher { }; template and std::is_same_v>* = - nullptr> + std::enable_if_t and std::is_same_v>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -462,7 +461,7 @@ struct LogicalOpDispatcher { } public: - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -477,7 +476,7 @@ struct LogicalOpDispatcher { } struct dictionary_dispatch { - template ()>* = nullptr> + template ()>* = nullptr> std::unique_ptr operator()(cudf::dictionary_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -499,9 +498,8 @@ struct LogicalOpDispatcher { } }; - template < - typename T, - typename std::enable_if_t() and std::is_same_v>* = nullptr> + template () and std::is_same_v>* = nullptr> std::unique_ptr operator()(cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/tests/binaryop/binop-fixture.hpp b/cpp/tests/binaryop/binop-fixture.hpp index 65243b1ae2e..2ba5561826e 100644 --- a/cpp/tests/binaryop/binop-fixture.hpp +++ b/cpp/tests/binaryop/binop-fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -61,14 +61,14 @@ struct BinaryOperationTest : public cudf::test::BaseFixture { return cudf::test::fixed_width_column_wrapper(data_iter, data_iter + size, validity_iter); } - template >* = nullptr> + template >* = nullptr> auto make_random_wrapped_scalar() { cudf::test::UniformRandomGenerator rand_gen(r_min, r_max); return cudf::scalar_type_t(rand_gen.generate()); } - template >* = nullptr> + template >* = nullptr> auto make_random_wrapped_scalar() { cudf::test::UniformRandomGenerator rand_gen(r_min, r_max); diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h index d78ad8938c4..93a84a7bc49 100644 --- a/cpp/tests/binaryop/util/operation.h +++ b/cpp/tests/binaryop/util/operation.h @@ -32,19 +32,19 @@ template struct Add { // Allow sum between chronos only when both input and output types // are chronos. Unsupported combinations will fail to compile - template () && cudf::is_chrono() && - cudf::is_chrono(), - void>::type* = nullptr> + template () && cudf::is_chrono() && + cudf::is_chrono(), + void>* = nullptr> OutT operator()(TypeLhs lhs, TypeRhs rhs) const { return lhs + rhs; } - template () || !cudf::is_chrono() || - !cudf::is_chrono(), - void>::type* = nullptr> + template () || !cudf::is_chrono() || + !cudf::is_chrono(), + void>* = nullptr> OutT operator()(TypeLhs lhs, TypeRhs rhs) const { using TypeCommon = typename std::common_type::type; @@ -56,19 +56,19 @@ template struct Sub { // Allow difference between chronos only when both input and output types // are chronos. Unsupported combinations will fail to compile - template () && cudf::is_chrono() && - cudf::is_chrono(), - void>::type* = nullptr> + template () && cudf::is_chrono() && + cudf::is_chrono(), + void>* = nullptr> OutT operator()(TypeLhs lhs, TypeRhs rhs) const { return lhs - rhs; } - template () || !cudf::is_chrono() || - !cudf::is_chrono(), - void>::type* = nullptr> + template () || !cudf::is_chrono() || + !cudf::is_chrono(), + void>* = nullptr> OutT operator()(TypeLhs lhs, TypeRhs rhs) const { using TypeCommon = typename std::common_type::type; @@ -78,28 +78,27 @@ struct Sub { template struct Mul { - template ::value, void>::type* = nullptr> + template ::value, void>* = nullptr> TypeOut operator()(TypeLhs lhs, TypeRhs rhs) const { using TypeCommon = typename std::common_type::type; return static_cast(static_cast(lhs) * static_cast(rhs)); } - template ::value, void>::type* = nullptr> + template ::value, void>* = nullptr> TypeOut operator()(TypeLhs x, TypeRhs y) const { return DurationProduct(x, y); } - template < - typename OutT, - typename LhsT, - typename RhsT, - typename std::enable_if<(cudf::is_duration_t::value && std::is_integral_v) || - (cudf::is_duration_t::value && std::is_integral_v), - void>::type* = nullptr> + template ::value && std::is_integral_v) || + (cudf::is_duration_t::value && std::is_integral_v), + void>* = nullptr> OutT DurationProduct(LhsT x, RhsT y) const { return x * y; @@ -108,26 +107,26 @@ struct Mul { template struct Div { - template ::value, void>::type* = nullptr> + template ::value, void>* = nullptr> TypeOut operator()(TypeLhs lhs, TypeRhs rhs) { using TypeCommon = typename std::common_type::type; return static_cast(static_cast(lhs) / static_cast(rhs)); } - template ::value, void>::type* = nullptr> + template ::value, void>* = nullptr> TypeOut operator()(TypeLhs x, TypeRhs y) const { return DurationDivide(x, y); } - template || cudf::is_duration()), - void>::type* = nullptr> + template < + typename OutT, + typename LhsT, + typename RhsT, + std::enable_if_t<(std::is_integral_v || cudf::is_duration()), void>* = nullptr> OutT DurationDivide(LhsT x, RhsT y) const { return x / y; @@ -185,10 +184,10 @@ struct Mod { } // Mod with duration types - duration % (integral or a duration) = duration - template ::value && - cudf::is_duration_t::value>* = nullptr> + template ::value && + cudf::is_duration_t::value>* = nullptr> TypeOut operator()(TypeLhs lhs, TypeRhs rhs) { return lhs % rhs; diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu index a306736d131..ec7fae58f98 100644 --- a/cpp/tests/copying/concatenate_tests.cu +++ b/cpp/tests/copying/concatenate_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -340,6 +340,22 @@ struct OverflowTest : public cudf::test::BaseFixture { TEST_F(OverflowTest, OverflowTest) { using namespace cudf; + // should concatenate up to size_type::max rows. + { + // 5 x size + size_last adds to size_type::max + constexpr auto size = static_cast(static_cast(250) * 1024 * 1024); + constexpr auto size_last = static_cast(836763647); + + auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, size); + auto many_chars_last = cudf::make_fixed_width_column(data_type{type_id::INT8}, size_last); + + table_view tbl({*many_chars}); + table_view tbl_last({*many_chars_last}); + std::vector table_views_to_concat({tbl, tbl, tbl, tbl, tbl, tbl_last}); + std::unique_ptr concatenated_tables = cudf::concatenate(table_views_to_concat); + EXPECT_NO_THROW(rmm::cuda_stream_default.synchronize()); + ASSERT_EQ(concatenated_tables->num_rows(), std::numeric_limits::max()); + } // primitive column { diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp index 4254794bf19..62f1300c284 100644 --- a/cpp/tests/copying/copy_tests.cpp +++ b/cpp/tests/copying/copy_tests.cpp @@ -378,18 +378,16 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarScalar) template struct create_chrono_scalar { template - typename std::enable_if_t< - std::is_same_v::type, std::true_type>, - cudf::timestamp_scalar> + std::enable_if_t::type, std::true_type>, + cudf::timestamp_scalar> operator()(Args&&... args) const { return cudf::timestamp_scalar(std::forward(args)...); } template - typename std::enable_if_t< - std::is_same_v::type, std::true_type>, - cudf::duration_scalar> + std::enable_if_t::type, std::true_type>, + cudf::duration_scalar> operator()(Args&&... args) const { return cudf::duration_scalar(std::forward(args)...); diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index fd065249c4e..31174d3fd72 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -51,7 +51,7 @@ constexpr inline bool is_timestamp_sum() // Disable SUM of TIMESTAMP types template ()>* = nullptr> + std::enable_if_t()>* = nullptr> __device__ T atomic_op(T* addr, T const& value, BinaryOp op) { return {}; @@ -59,7 +59,7 @@ __device__ T atomic_op(T* addr, T const& value, BinaryOp op) template ()>* = nullptr> + std::enable_if_t()>* = nullptr> __device__ T atomic_op(T* addr, T const& value, BinaryOp op) { T old_value = *addr; @@ -92,13 +92,13 @@ __global__ void gpu_atomicCAS_test(T* result, T* data, size_t size) } template -typename std::enable_if_t(), T> accumulate(cudf::host_span xs) +std::enable_if_t(), T> accumulate(cudf::host_span xs) { return std::accumulate(xs.begin(), xs.end(), T{0}); } template -typename std::enable_if_t(), T> accumulate(cudf::host_span xs) +std::enable_if_t(), T> accumulate(cudf::host_span xs) { auto ys = std::vector(xs.size()); std::transform( diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu index 2591f395914..b0ce22bae7c 100644 --- a/cpp/tests/groupby/tdigest_tests.cu +++ b/cpp/tests/groupby/tdigest_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,9 +60,8 @@ struct column_max { }; struct tdigest_gen { - template < - typename T, - typename std::enable_if_t() || cudf::is_fixed_point()>* = nullptr> + template () || cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta) { cudf::table_view t({keys}); @@ -75,9 +74,8 @@ struct tdigest_gen { return std::move(result.second[0].results[0]); } - template < - typename T, - typename std::enable_if_t() && !cudf::is_fixed_point()>* = nullptr> + template () && !cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta) { CUDF_FAIL("Invalid tdigest test type"); diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 08cdbb10359..e5e44b1aa6e 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -262,7 +262,7 @@ void check_string_column(cudf::column_view const& col_lhs, } // Helper function to compare two floating-point column contents -template >* = nullptr> +template >* = nullptr> void expect_column_data_equal(std::vector const& lhs, cudf::column_view const& rhs) { EXPECT_THAT(cudf::test::to_host(rhs).first, @@ -270,7 +270,7 @@ void expect_column_data_equal(std::vector const& lhs, cudf::column_view const } // Helper function to compare two column contents -template >* = nullptr> +template >* = nullptr> void expect_column_data_equal(std::vector const& lhs, cudf::column_view const& rhs) { EXPECT_THAT(cudf::test::to_host(rhs).first, ::testing::ElementsAreArray(lhs)); diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp index 27a8be95e9b..cfd1a16f19a 100644 --- a/cpp/tests/io/text/multibyte_split_test.cpp +++ b/cpp/tests/io/text/multibyte_split_test.cpp @@ -21,6 +21,8 @@ #include #include +#include +#include #include #include #include @@ -142,4 +144,29 @@ TEST_F(MultibyteSplitTest, HandpickedInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS); } +TEST_F(MultibyteSplitTest, LargeInputMultipleRange) +{ + auto host_input = std::string(); + auto host_expected = std::vector(); + + for (auto i = 0; i < 1000; i++) { + host_input += "...:|"; + } + + auto delimiter = std::string("...:|"); + auto source = cudf::io::text::make_source(host_input); + + auto byte_ranges = cudf::io::text::create_byte_range_infos_consecutive(host_input.size(), 3); + auto out0 = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[0]); + auto out1 = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[1]); + auto out2 = cudf::io::text::multibyte_split(*source, delimiter, byte_ranges[2]); + + auto out_views = std::vector({out0->view(), out1->view(), out2->view()}); + auto out = cudf::concatenate(out_views); + + auto expected = cudf::io::text::multibyte_split(*source, delimiter); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected->view(), *out, debug_output_level::ALL_ERRORS); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu index 2f4d5a7a604..035cd664aa2 100644 --- a/cpp/tests/quantiles/percentile_approx_test.cu +++ b/cpp/tests/quantiles/percentile_approx_test.cu @@ -1,3 +1,18 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include @@ -21,9 +36,8 @@ using namespace cudf; using namespace cudf::tdigest; struct tdigest_gen { - template < - typename T, - typename std::enable_if_t() || cudf::is_fixed_point()>* = nullptr> + template () || cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta) { cudf::table_view t({keys}); @@ -36,9 +50,8 @@ struct tdigest_gen { return std::move(result.second[0].results[0]); } - template < - typename T, - typename std::enable_if_t() && !cudf::is_fixed_point()>* = nullptr> + template () && !cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta) { CUDF_FAIL("Invalid tdigest test type"); @@ -89,9 +102,8 @@ std::unique_ptr arrow_percentile_approx(column_view const& _values, } struct percentile_approx_dispatch { - template < - typename T, - typename std::enable_if_t() || cudf::is_fixed_point()>* = nullptr> + template () || cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta, @@ -127,9 +139,8 @@ struct percentile_approx_dispatch { return result; } - template < - typename T, - typename std::enable_if_t() && !cudf::is_fixed_point()>* = nullptr> + template () && !cudf::is_fixed_point()>* = nullptr> std::unique_ptr operator()(column_view const& keys, column_view const& values, int delta, diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index fcecc124978..276b244dac6 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -38,7 +38,7 @@ using aggregation = cudf::aggregation; template -typename std::enable_if::value, std::vector>::type convert_values( +std::enable_if_t::value, std::vector> convert_values( std::vector const& int_values) { std::vector v(int_values.size()); @@ -50,7 +50,7 @@ typename std::enable_if::value, std::vector>::type c } template -typename std::enable_if::value, std::vector>::type convert_values( +std::enable_if_t::value, std::vector> convert_values( std::vector const& int_values) { std::vector v(int_values.size()); diff --git a/cpp/tests/reductions/scan_tests.hpp b/cpp/tests/reductions/scan_tests.hpp index 346103de85b..858697d8ef5 100644 --- a/cpp/tests/reductions/scan_tests.hpp +++ b/cpp/tests/reductions/scan_tests.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,25 +58,23 @@ struct TypeParam_to_host_type { }; template -typename std::enable_if, - thrust::host_vector>::type +std::enable_if_t, thrust::host_vector> make_vector(std::initializer_list const& init) { return cudf::test::make_type_param_vector(init); } template -typename std::enable_if(), - thrust::host_vector>::type +std::enable_if_t(), thrust::host_vector> make_vector(std::initializer_list const& init) { return cudf::test::make_type_param_vector(init); } template -typename std::enable_if || - cudf::is_fixed_point()), - thrust::host_vector>::type +std::enable_if_t || + cudf::is_fixed_point()), + thrust::host_vector> make_vector(std::initializer_list const& init) { return cudf::test::make_type_param_vector(init); diff --git a/cpp/tests/sort/is_sorted_tests.cpp b/cpp/tests/sort/is_sorted_tests.cpp index 7d277059ef7..44fa83204ee 100644 --- a/cpp/tests/sort/is_sorted_tests.cpp +++ b/cpp/tests/sort/is_sorted_tests.cpp @@ -36,8 +36,7 @@ namespace testdata { // ----- most numerics template -typename std::enable_if && !std::is_same_v, - fixed_width_column_wrapper>::type +std::enable_if_t && !std::is_same_v, fixed_width_column_wrapper> ascending() { return std::is_signed_v ? fixed_width_column_wrapper({std::numeric_limits::lowest(), @@ -58,8 +57,7 @@ ascending() } template -typename std::enable_if && !std::is_same_v, - fixed_width_column_wrapper>::type +std::enable_if_t && !std::is_same_v, fixed_width_column_wrapper> descending() { return std::is_signed_v ? fixed_width_column_wrapper({std::numeric_limits::max(), @@ -100,14 +98,13 @@ auto nulls_before() // ----- bool template -typename std::enable_if, fixed_width_column_wrapper>::type ascending() +std::enable_if_t, fixed_width_column_wrapper> ascending() { return fixed_width_column_wrapper({false, false, true, true}); } template -typename std::enable_if, fixed_width_column_wrapper>::type -descending() +std::enable_if_t, fixed_width_column_wrapper> descending() { return fixed_width_column_wrapper({true, true, false, false}); } @@ -115,13 +112,13 @@ descending() // ----- chrono types template -typename std::enable_if(), fixed_width_column_wrapper>::type ascending() +std::enable_if_t(), fixed_width_column_wrapper> ascending() { return fixed_width_column_wrapper({T::min(), T::max()}); } template -typename std::enable_if(), fixed_width_column_wrapper>::type descending() +std::enable_if_t(), fixed_width_column_wrapper> descending() { return fixed_width_column_wrapper({T::max(), T::min()}); } @@ -129,15 +126,13 @@ typename std::enable_if(), fixed_width_column_wrapper>::ty // ----- string_view template -typename std::enable_if, strings_column_wrapper>::type -ascending() +std::enable_if_t, strings_column_wrapper> ascending() { return strings_column_wrapper({"A", "B"}); } template -typename std::enable_if, strings_column_wrapper>::type -descending() +std::enable_if_t, strings_column_wrapper> descending() { return strings_column_wrapper({"B", "A"}); } @@ -163,8 +158,7 @@ auto nulls_before() // ----- struct_view {"nestedInt" : {"Int" : 0 }, "float" : 1} template -typename std::enable_if, structs_column_wrapper>::type -ascending() +std::enable_if_t, structs_column_wrapper> ascending() { using T1 = int32_t; auto int_col = fixed_width_column_wrapper({std::numeric_limits::lowest(), @@ -182,8 +176,7 @@ ascending() } template -typename std::enable_if, structs_column_wrapper>::type -descending() +std::enable_if_t, structs_column_wrapper> descending() { using T1 = int32_t; auto int_col = fixed_width_column_wrapper({std::numeric_limits::max(), diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 5403d56318e..9daf70227f8 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -836,13 +836,13 @@ std::vector bitmask_to_host(cudf::column_view const& c) namespace { -template >* = nullptr> +template >* = nullptr> static auto numeric_to_string_precise(T value) { return std::to_string(value); } -template >* = nullptr> +template >* = nullptr> static auto numeric_to_string_precise(T value) { std::ostringstream o; @@ -915,7 +915,7 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons } struct column_view_printer { - template ()>* = nullptr> + template ()>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const&) { auto h_data = cudf::test::to_host(col); @@ -939,7 +939,7 @@ struct column_view_printer { } } - template ()>* = nullptr> + template ()>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const& indent) @@ -965,7 +965,7 @@ struct column_view_printer { this->template operator()(*col_as_strings, out, indent); } - template ()>* = nullptr> + template ()>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const&) { auto const h_data = cudf::test::to_host(col); @@ -987,7 +987,7 @@ struct column_view_printer { } template >* = nullptr> + std::enable_if_t>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const&) { // @@ -1008,7 +1008,7 @@ struct column_view_printer { } template >* = nullptr> + std::enable_if_t>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const&) { cudf::dictionary_column_view dictionary(col); @@ -1029,7 +1029,7 @@ struct column_view_printer { } // Print the tick counts with the units - template ()>* = nullptr> + template ()>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const&) { auto h_data = cudf::test::to_host(col); @@ -1054,8 +1054,7 @@ struct column_view_printer { } } - template >* = nullptr> + template >* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const& indent) @@ -1084,7 +1083,7 @@ struct column_view_printer { } template >* = nullptr> + std::enable_if_t>* = nullptr> void operator()(cudf::column_view const& col, std::vector& out, std::string const& indent) diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu index 097b786aefe..48500c84942 100644 --- a/cpp/tests/wrappers/timestamps_test.cu +++ b/cpp/tests/wrappers/timestamps_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,7 @@ struct compare_chrono_elements_to_primitive_representation { { } - template ()>* = nullptr> + template ()>* = nullptr> __host__ __device__ bool operator()(const int32_t element_index) { using Primitive = typename ChronoT::rep; @@ -59,7 +59,7 @@ struct compare_chrono_elements_to_primitive_representation { return primitive == timestamp.time_since_epoch().count(); } - template ()>* = nullptr> + template ()>* = nullptr> __host__ __device__ bool operator()(const int32_t element_index) { using Primitive = typename ChronoT::rep; diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 2de55553c3f..7a7c9c195b2 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -209,8 +209,8 @@ Reshaping, sorting, transposing DataFrame.T DataFrame.transpose -Combining / comparing / joining / merging / encoding -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Combining / comparing / joining / merging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ @@ -219,8 +219,6 @@ Combining / comparing / joining / merging / encoding DataFrame.join DataFrame.merge DataFrame.update - DataFrame.label_encoding - DataFrame.one_hot_encoding Numerical operations ~~~~~~~~~~~~~~~~~~~~ @@ -249,8 +247,6 @@ Serialization / IO / conversion .. autosummary:: :toctree: api/ - DataFrame.as_gpu_matrix - DataFrame.as_matrix DataFrame.from_arrow DataFrame.from_pandas DataFrame.from_records diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index d705504cc0c..b7b358e38be 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -34,7 +34,7 @@ Properties Index.shape Index.size Index.values - + Modifying and computations ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -94,7 +94,6 @@ Conversion Index.astype Index.to_arrow Index.to_list - Index.to_numpy Index.to_series Index.to_frame Index.to_pandas diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index cf5dd4a2a1d..376acf1694b 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -44,7 +44,6 @@ Conversion Series.copy Series.to_list Series.__array__ - Series.as_mask Series.scale @@ -172,9 +171,7 @@ Reindexing / selection / label manipulation Series.reindex Series.rename Series.reset_index - Series.reverse Series.sample - Series.set_mask Series.take Series.tail Series.tile @@ -210,15 +207,13 @@ Reshaping, sorting Series.repeat Series.transpose -Combining / comparing / joining / merging / encoding ----------------------------------------------------- +Combining / comparing / joining / merging +----------------------------------------- .. autosummary:: :toctree: api/ Series.append Series.update - Series.label_encoding - Series.one_hot_encoding Numerical operations ~~~~~~~~~~~~~~~~~~~~ @@ -409,12 +404,10 @@ Serialization / IO / conversion :toctree: api/ Series.to_arrow - Series.to_cupy Series.to_dlpack Series.to_frame Series.to_hdf Series.to_json - Series.to_numpy Series.to_pandas Series.to_string Series.from_arrow diff --git a/python/cudf/cudf/_lib/cpp/io/text.pxd b/python/cudf/cudf/_lib/cpp/io/text.pxd index 9ce0c68cb08..5b110d6234c 100644 --- a/python/cudf/cudf/_lib/cpp/io/text.pxd +++ b/python/cudf/cudf/_lib/cpp/io/text.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -6,6 +6,13 @@ from libcpp.string cimport string from cudf._lib.cpp.column.column cimport column +cdef extern from "cudf/io/text/byte_range_info.hpp" \ + namespace "cudf::io::text" nogil: + + cdef cppclass byte_range_info: + byte_range_info() except + + byte_range_info(size_t offset, size_t size) except + + cdef extern from "cudf/io/text/data_chunk_source.hpp" \ namespace "cudf::io::text" nogil: @@ -25,3 +32,7 @@ cdef extern from "cudf/io/text/multibyte_split.hpp" \ unique_ptr[column] multibyte_split(data_chunk_source source, string delimiter) except + + + unique_ptr[column] multibyte_split(data_chunk_source source, + string delimiter, + byte_range_info byte_range) except + diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx index 9f33f32bdaf..daea227cc39 100644 --- a/python/cudf/cudf/_lib/text.pyx +++ b/python/cudf/cudf/_lib/text.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import cudf @@ -10,6 +10,7 @@ from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.io.text cimport ( + byte_range_info, data_chunk_source, make_source, make_source_from_file, @@ -18,7 +19,8 @@ from cudf._lib.cpp.io.text cimport ( def read_text(object filepaths_or_buffers, - object delimiter=None): + object delimiter=None, + object byte_range=None): """ Cython function to call into libcudf API, see `multibyte_split`. @@ -31,9 +33,25 @@ def read_text(object filepaths_or_buffers, cdef unique_ptr[data_chunk_source] datasource cdef unique_ptr[column] c_col - - with nogil: - datasource = move(make_source_from_file(filename)) - c_col = move(multibyte_split(dereference(datasource), delim)) + cdef size_t c_byte_range_offset + cdef size_t c_byte_range_size + cdef byte_range_info c_byte_range + + if (byte_range is not None): + c_byte_range_offset = byte_range[0] + c_byte_range_size = byte_range[1] + with nogil: + datasource = move(make_source_from_file(filename)) + c_byte_range = byte_range_info( + c_byte_range_offset, + c_byte_range_size) + c_col = move(multibyte_split( + dereference(datasource), + delim, + c_byte_range)) + else: + with nogil: + datasource = move(make_source_from_file(filename)) + c_col = move(multibyte_split(dereference(datasource), delim)) return {None: Column.from_unique_ptr(move(c_col))} diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d5edbd7284e..510bf0208b7 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -46,7 +46,7 @@ ) from cudf.core.column_accessor import ColumnAccessor from cudf.core.join import Merge, MergeSemi -from cudf.core.mixins import Scannable +from cudf.core.mixins import BinaryOperand, Scannable from cudf.core.window import Rolling from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring @@ -98,7 +98,7 @@ } -class Frame(Scannable): +class Frame(BinaryOperand, Scannable): """A collection of Column objects with an optional index. Parameters @@ -115,6 +115,8 @@ class Frame(Scannable): _index: Optional[cudf.core.index.BaseIndex] _names: Optional[List] + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS + _VALID_SCANS = { "cumsum", "cumprod", @@ -3571,13 +3573,7 @@ def _unaryop(self, op): ) def _binaryop( - self, - other: T, - fn: str, - fill_value: Any = None, - reflect: bool = False, - *args, - **kwargs, + self, other: T, op: str, fill_value: Any = None, *args, **kwargs, ) -> Frame: """Perform a binary operation between two frames. @@ -3585,15 +3581,11 @@ def _binaryop( ---------- other : Frame The second operand. - fn : str + op : str The operation to perform. fill_value : Any, default None The value to replace null values with. If ``None``, nulls are not filled before the operation. - reflect : bool, default False - If ``True``, swap the order of the operands. See - https://docs.python.org/3/reference/datamodel.html#object.__ror__ - for more information on when this is necessary. Returns ------- @@ -3633,6 +3625,7 @@ def _colwise_binop( A dict of columns constructed from the result of performing the requested operation on the operands. """ + fn = fn[2:-2] # Now actually perform the binop on the columns in left and right. output = {} @@ -3915,83 +3908,12 @@ def dot(self, other, reflect=False): return cudf.DataFrame(result) return result.item() - # Binary arithmetic operations. - def __add__(self, other): - return self._binaryop(other, "add") - - def __radd__(self, other): - return self._binaryop(other, "add", reflect=True) - - def __sub__(self, other): - return self._binaryop(other, "sub") - - def __rsub__(self, other): - return self._binaryop(other, "sub", reflect=True) - def __matmul__(self, other): return self.dot(other) def __rmatmul__(self, other): return self.dot(other, reflect=True) - def __mul__(self, other): - return self._binaryop(other, "mul") - - def __rmul__(self, other): - return self._binaryop(other, "mul", reflect=True) - - def __mod__(self, other): - return self._binaryop(other, "mod") - - def __rmod__(self, other): - return self._binaryop(other, "mod", reflect=True) - - def __pow__(self, other): - return self._binaryop(other, "pow") - - def __rpow__(self, other): - return self._binaryop(other, "pow", reflect=True) - - def __floordiv__(self, other): - return self._binaryop(other, "floordiv") - - def __rfloordiv__(self, other): - return self._binaryop(other, "floordiv", reflect=True) - - def __truediv__(self, other): - return self._binaryop(other, "truediv") - - def __rtruediv__(self, other): - return self._binaryop(other, "truediv", reflect=True) - - def __and__(self, other): - return self._binaryop(other, "and") - - def __or__(self, other): - return self._binaryop(other, "or") - - def __xor__(self, other): - return self._binaryop(other, "xor") - - # Binary rich comparison operations. - def __eq__(self, other): - return self._binaryop(other, "eq") - - def __ne__(self, other): - return self._binaryop(other, "ne") - - def __lt__(self, other): - return self._binaryop(other, "lt") - - def __le__(self, other): - return self._binaryop(other, "le") - - def __gt__(self, other): - return self._binaryop(other, "gt") - - def __ge__(self, other): - return self._binaryop(other, "ge") - # Unary logical operators def __neg__(self): return -1 * self @@ -5062,7 +4984,7 @@ def add(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "add", fill_value) + return self._binaryop(other, "__add__", fill_value) @annotate("FRAME_RADD", color="green", domain="cudf_python") def radd(self, other, axis, level=None, fill_value=None): @@ -5142,7 +5064,7 @@ def radd(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "add", fill_value, reflect=True) + return self._binaryop(other, "__radd__", fill_value) @annotate("FRAME_SUBTRACT", color="green", domain="cudf_python") def subtract(self, other, axis, level=None, fill_value=None): @@ -5223,7 +5145,7 @@ def subtract(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "sub", fill_value) + return self._binaryop(other, "__sub__", fill_value) sub = subtract @@ -5309,7 +5231,7 @@ def rsub(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "sub", fill_value, reflect=True) + return self._binaryop(other, "__rsub__", fill_value) @annotate("FRAME_MULTIPLY", color="green", domain="cudf_python") def multiply(self, other, axis, level=None, fill_value=None): @@ -5392,7 +5314,7 @@ def multiply(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "mul", fill_value) + return self._binaryop(other, "__mul__", fill_value) mul = multiply @@ -5479,7 +5401,7 @@ def rmul(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "mul", fill_value, reflect=True) + return self._binaryop(other, "__rmul__", fill_value) @annotate("FRAME_MOD", color="green", domain="cudf_python") def mod(self, other, axis, level=None, fill_value=None): @@ -5550,7 +5472,7 @@ def mod(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "mod", fill_value) + return self._binaryop(other, "__mod__", fill_value) @annotate("FRAME_RMOD", color="green", domain="cudf_python") def rmod(self, other, axis, level=None, fill_value=None): @@ -5633,7 +5555,7 @@ def rmod(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "mod", fill_value, reflect=True) + return self._binaryop(other, "__rmod__", fill_value) @annotate("FRAME_POW", color="green", domain="cudf_python") def pow(self, other, axis, level=None, fill_value=None): @@ -5713,7 +5635,7 @@ def pow(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "pow", fill_value) + return self._binaryop(other, "__pow__", fill_value) @annotate("FRAME_RPOW", color="green", domain="cudf_python") def rpow(self, other, axis, level=None, fill_value=None): @@ -5793,7 +5715,7 @@ def rpow(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "pow", fill_value, reflect=True) + return self._binaryop(other, "__rpow__", fill_value) @annotate("FRAME_FLOORDIV", color="green", domain="cudf_python") def floordiv(self, other, axis, level=None, fill_value=None): @@ -5873,7 +5795,7 @@ def floordiv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "floordiv", fill_value) + return self._binaryop(other, "__floordiv__", fill_value) @annotate("FRAME_RFLOORDIV", color="green", domain="cudf_python") def rfloordiv(self, other, axis, level=None, fill_value=None): @@ -5970,7 +5892,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "floordiv", fill_value, reflect=True) + return self._binaryop(other, "__rfloordiv__", fill_value) @annotate("FRAME_TRUEDIV", color="green", domain="cudf_python") def truediv(self, other, axis, level=None, fill_value=None): @@ -6055,7 +5977,7 @@ def truediv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "truediv", fill_value) + return self._binaryop(other, "__truediv__", fill_value) # Alias for truediv div = truediv @@ -6149,7 +6071,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None): if level is not None: raise NotImplementedError("level parameter is not supported yet.") - return self._binaryop(other, "truediv", fill_value, reflect=True) + return self._binaryop(other, "__rtruediv__", fill_value) # Alias for rtruediv rdiv = rtruediv @@ -6227,7 +6149,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ return self._binaryop( - other=other, fn="eq", fill_value=fill_value, can_reindex=True + other=other, op="__eq__", fill_value=fill_value, can_reindex=True ) @annotate("FRAME_NE", color="green", domain="cudf_python") @@ -6303,7 +6225,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ # noqa: E501 return self._binaryop( - other=other, fn="ne", fill_value=fill_value, can_reindex=True + other=other, op="__ne__", fill_value=fill_value, can_reindex=True ) @annotate("FRAME_LT", color="green", domain="cudf_python") @@ -6379,7 +6301,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ # noqa: E501 return self._binaryop( - other=other, fn="lt", fill_value=fill_value, can_reindex=True + other=other, op="__lt__", fill_value=fill_value, can_reindex=True ) @annotate("FRAME_LE", color="green", domain="cudf_python") @@ -6455,7 +6377,7 @@ def le(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ # noqa: E501 return self._binaryop( - other=other, fn="le", fill_value=fill_value, can_reindex=True + other=other, op="__le__", fill_value=fill_value, can_reindex=True ) @annotate("FRAME_GT", color="green", domain="cudf_python") @@ -6531,7 +6453,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ # noqa: E501 return self._binaryop( - other=other, fn="gt", fill_value=fill_value, can_reindex=True + other=other, op="__gt__", fill_value=fill_value, can_reindex=True ) @annotate("FRAME_GE", color="green", domain="cudf_python") @@ -6607,7 +6529,7 @@ def ge(self, other, axis="columns", level=None, fill_value=None): dtype: bool """ # noqa: E501 return self._binaryop( - other=other, fn="ge", fill_value=fill_value, can_reindex=True + other=other, op="__ge__", fill_value=fill_value, can_reindex=True ) def nunique(self, method: builtins.str = "sort", dropna: bool = True): diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5aab834d452..343ba33ece1 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -52,6 +52,7 @@ from cudf.core.column.string import StringMethods as StringMethods from cudf.core.dtypes import IntervalDtype from cudf.core.frame import Frame +from cudf.core.mixins import BinaryOperand from cudf.core.single_column_frame import SingleColumnFrame from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import find_common_type @@ -122,7 +123,7 @@ def _index_from_columns( return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) -class RangeIndex(BaseIndex): +class RangeIndex(BaseIndex, BinaryOperand): """ Immutable Index implementing a monotonic integer range. @@ -155,6 +156,8 @@ class RangeIndex(BaseIndex): RangeIndex(start=1, stop=10, step=1, name='a') """ + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS + _range: range def __init__( @@ -698,43 +701,16 @@ def _apply_boolean_mask(self, boolean_mask): [self._values.apply_boolean_mask(boolean_mask)], [self.name] ) + def _binaryop(self, other, op: str): + return self._as_int64()._binaryop(other, op=op) + # Patch in all binops and unary ops, which bypass __getattr__ on the instance # and prevent the above overload from working. -for binop in ( - "__add__", - "__radd__", - "__sub__", - "__rsub__", - "__mod__", - "__rmod__", - "__pow__", - "__rpow__", - "__floordiv__", - "__rfloordiv__", - "__truediv__", - "__rtruediv__", - "__and__", - "__or__", - "__xor__", - "__eq__", - "__ne__", - "__lt__", - "__le__", - "__gt__", - "__ge__", -): - setattr( - RangeIndex, - binop, - lambda self, other, op=binop: getattr(self._as_int64(), op)(other), - ) - - for unaop in ("__neg__", "__pos__", "__abs__"): setattr( RangeIndex, - binop, + unaop, lambda self, op=unaop: getattr(self._as_int64(), op)(), ) @@ -814,19 +790,15 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented def _binaryop( - self, - other: T, - fn: str, - fill_value: Any = None, - reflect: bool = False, - *args, - **kwargs, + self, other: T, op: str, fill_value: Any = None, *args, **kwargs, ) -> SingleColumnFrame: - # Specialize binops to generate the appropriate output index type. + reflect = self._is_reflected_op(op) + if reflect: + op = op[:2] + op[3:] operands = self._make_operands_for_binop(other, fill_value, reflect) if operands is NotImplemented: return NotImplemented - ret = _index_from_data(self._colwise_binop(operands, fn)) + ret = _index_from_data(self._colwise_binop(operands, op)) # pandas returns numpy arrays when the outputs are boolean. We # explicitly _do not_ use isinstance here: we want only boolean diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 3ae0a838873..331457d17ae 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1697,21 +1697,23 @@ def last(self, offset): def _binaryop( self, other: Any, - fn: str, + op: str, fill_value: Any = None, - reflect: bool = False, can_reindex: bool = False, *args, **kwargs, ): + reflect = self._is_reflected_op(op) + if reflect: + op = op[:2] + op[3:] operands, out_index = self._make_operands_and_index_for_binop( - other, fn, fill_value, reflect, can_reindex + other, op, fill_value, reflect, can_reindex ) if operands is NotImplemented: return NotImplemented return self._from_data( - ColumnAccessor(type(self)._colwise_binop(operands, fn)), + ColumnAccessor(type(self)._colwise_binop(operands, op)), index=out_index, ) diff --git a/python/cudf/cudf/core/mixins/__init__.py b/python/cudf/cudf/core/mixins/__init__.py index 507b3b18ac2..8306f3f11b3 100644 --- a/python/cudf/cudf/core/mixins/__init__.py +++ b/python/cudf/cudf/core/mixins/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) 2022, NVIDIA CORPORATION. +from .binops import BinaryOperand from .reductions import Reducible from .scans import Scannable -__all__ = ["Reducible", "Scannable"] +__all__ = ["BinaryOperand", "Reducible", "Scannable"] diff --git a/python/cudf/cudf/core/mixins/binops.py b/python/cudf/cudf/core/mixins/binops.py new file mode 100644 index 00000000000..773b47b62b2 --- /dev/null +++ b/python/cudf/cudf/core/mixins/binops.py @@ -0,0 +1,56 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from .mixin_factory import _create_delegating_mixin + +BinaryOperand = _create_delegating_mixin( + "BinaryOperand", + "Mixin encapsulating binary operations.", + "BINARY_OPERATION", + "_binaryop", + { + # Numeric operations. + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__truediv__", + "__floordiv__", + "__mod__", + # "__divmod__", # Not yet implemented + "__pow__", + # "__lshift__", # Not yet implemented + # "__rshift__", # Not yet implemented + "__and__", + "__xor__", + "__or__", + # Reflected numeric operations. + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rtruediv__", + "__rfloordiv__", + "__rmod__", + # "__rdivmod__", # Not yet implemented + "__rpow__", + # "__rlshift__", # Not yet implemented + # "__rrshift__", # Not yet implemented + "__rand__", + "__rxor__", + "__ror__", + # Rich comparison operations. + "__lt__", + "__le__", + "__eq__", + "__ne__", + "__gt__", + "__ge__", + }, +) + + +def _is_reflected_op(op): + return op[2] == "r" and op != "__rshift__" + + +BinaryOperand._is_reflected_op = staticmethod(_is_reflected_op) diff --git a/python/cudf/cudf/core/mixins/binops.pyi b/python/cudf/cudf/core/mixins/binops.pyi new file mode 100644 index 00000000000..45093cd04d4 --- /dev/null +++ b/python/cudf/cudf/core/mixins/binops.pyi @@ -0,0 +1,88 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +from typing import Set + +class BinaryOperand: + _SUPPORTED_BINARY_OPERATIONS: Set + + def __add__(self, other): + ... + + def __sub__(self, other): + ... + + def __mul__(self, other): + ... + + def __truediv__(self, other): + ... + + def __floordiv__(self, other): + ... + + def __mod__(self, other): + ... + + def __pow__(self, other): + ... + + def __and__(self, other): + ... + + def __xor__(self, other): + ... + + def __or__(self, other): + ... + + def __radd__(self, other): + ... + + def __rsub__(self, other): + ... + + def __rmul__(self, other): + ... + + def __rtruediv__(self, other): + ... + + def __rfloordiv__(self, other): + ... + + def __rmod__(self, other): + ... + + def __rpow__(self, other): + ... + + def __rand__(self, other): + ... + + def __rxor__(self, other): + ... + + def __ror__(self, other): + ... + + def __lt__(self, other): + ... + + def __le__(self, other): + ... + + def __eq__(self, other): + ... + + def __ne__(self, other): + ... + + def __gt__(self, other): + ... + + def __ge__(self, other): + ... + + @staticmethod + def _is_reflected_op(op) -> bool: + ... diff --git a/python/cudf/cudf/core/mixins/mixin_factory.py b/python/cudf/cudf/core/mixins/mixin_factory.py index ecb18f61830..7bbb299d643 100644 --- a/python/cudf/cudf/core/mixins/mixin_factory.py +++ b/python/cudf/cudf/core/mixins/mixin_factory.py @@ -86,14 +86,18 @@ def _should_define_operation(cls, operation, base_operation_name): # At this point we know that the class has the operation defined but it # also overrides the base operation. Since this function is called before # the operation is defined on the current class, we know that it inherited - # the operation from a parent. We therefore have two possibilities: + # the operation from a parent. We therefore have three possibilities: # 1. A parent class manually defined the operation. That override takes # precedence even if the current class defined the base operation. # 2. A parent class has an auto-generated operation, i.e. it is of type # Operation and was created by OperationMixin.__init_subclass__. The # current class must override it so that its base operation is used # rather than the parent's base operation. + # 3. The method is defined for all classes, i.e. it is a method of object. for base_cls in cls.__mro__: + # We always override methods defined for object. + if base_cls is object: + return True # The first attribute in the MRO is the one that will be used. if operation in base_cls.__dict__: return isinstance(base_cls.__dict__[operation], Operation) @@ -216,6 +220,7 @@ def __init_subclass__(cls): # Only add the valid set of operations for a particular class. valid_operations = set() for base_cls in cls.__mro__: + # Check for sentinel indicating that all operations are valid. valid_operations |= getattr(base_cls, validity_attr, set()) invalid_operations = valid_operations - supported_operations @@ -251,9 +256,8 @@ def _operation(self, op: str, *args, **kwargs): ) setattr(OperationMixin, base_operation_name, _operation) - # This attribute is set in case lookup is convenient at a later point, but - # it is not strictly necessary since `supported_operations` is part of the - # closure associated with the class's creation. + # Making this attribute available makes it easy for subclasses to indicate + # that all supported operations for this mixin are valid. setattr(OperationMixin, supported_attr, supported_operations) return OperationMixin diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi index 600f30e9372..3769b7c360e 100644 --- a/python/cudf/cudf/core/mixins/reductions.pyi +++ b/python/cudf/cudf/core/mixins/reductions.pyi @@ -1,8 +1,10 @@ # Copyright (c) 2022, NVIDIA CORPORATION. -from __future__ import annotations +from typing import Set class Reducible: + _SUPPORTED_REDUCTIONS: Set + def sum(self): ... diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 134b94bf0f2..1c81803ed98 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -10,6 +10,7 @@ from cudf.core.column.column import ColumnBase from cudf.core.dtypes import ListDtype, StructDtype from cudf.core.index import BaseIndex +from cudf.core.mixins import BinaryOperand from cudf.core.series import Series from cudf.utils.dtypes import ( get_allowed_combinations_for_operator, @@ -17,7 +18,7 @@ ) -class Scalar: +class Scalar(BinaryOperand): """ A GPU-backed scalar object with NumPy scalar like properties May be used in binary operations against other scalars, cuDF @@ -57,6 +58,8 @@ class Scalar: The data type """ + _VALID_BINARY_OPERATIONS = BinaryOperand._SUPPORTED_BINARY_OPERATIONS + def __init__(self, value, dtype=None): self._host_value = None @@ -211,69 +214,8 @@ def __float__(self): def __bool__(self): return bool(self.value) - # Scalar Binary Operations - def __add__(self, other): - return self._scalar_binop(other, "__add__") - - def __radd__(self, other): - return self._scalar_binop(other, "__radd__") - - def __sub__(self, other): - return self._scalar_binop(other, "__sub__") - - def __rsub__(self, other): - return self._scalar_binop(other, "__rsub__") - - def __mul__(self, other): - return self._scalar_binop(other, "__mul__") - - def __rmul__(self, other): - return self._scalar_binop(other, "__rmul__") - - def __truediv__(self, other): - return self._scalar_binop(other, "__truediv__") - - def __floordiv__(self, other): - return self._scalar_binop(other, "__floordiv__") - - def __rtruediv__(self, other): - return self._scalar_binop(other, "__rtruediv__") - - def __mod__(self, other): - return self._scalar_binop(other, "__mod__") - - def __divmod__(self, other): - return self._scalar_binop(other, "__divmod__") - - def __and__(self, other): - return self._scalar_binop(other, "__and__") - - def __xor__(self, other): - return self._scalar_binop(other, "__or__") - - def __pow__(self, other): - return self._scalar_binop(other, "__pow__") - - def __gt__(self, other): - return self._scalar_binop(other, "__gt__") - - def __lt__(self, other): - return self._scalar_binop(other, "__lt__") - - def __ge__(self, other): - return self._scalar_binop(other, "__ge__") - - def __le__(self, other): - return self._scalar_binop(other, "__le__") - - def __eq__(self, other): - return self._scalar_binop(other, "__eq__") - - def __ne__(self, other): - return self._scalar_binop(other, "__ne__") - def __round__(self, n): - return self._scalar_binop(n, "__round__") + return self._binaryop(n, "__round__") # Scalar Unary Operations def __abs__(self): @@ -330,7 +272,7 @@ def _binop_result_dtype_or_error(self, other, op): return cudf.dtype(out_dtype) - def _scalar_binop(self, other, op): + def _binaryop(self, other, op: str): if isinstance(other, (ColumnBase, Series, BaseIndex, np.ndarray)): # dispatch to column implementation return NotImplemented diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index ec87fcdb066..fffce27c89a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1245,21 +1245,21 @@ def logical_and(self, other): "Series.logical_and is deprecated and will be removed.", FutureWarning, ) - return self._binaryop(other, "l_and").astype(np.bool_) + return self._binaryop(other, "__l_and__").astype(np.bool_) def remainder(self, other): warnings.warn( "Series.remainder is deprecated and will be removed.", FutureWarning, ) - return self._binaryop(other, "mod") + return self._binaryop(other, "__mod__") def logical_or(self, other): warnings.warn( "Series.logical_or is deprecated and will be removed.", FutureWarning, ) - return self._binaryop(other, "l_or").astype(np.bool_) + return self._binaryop(other, "__l_or__").astype(np.bool_) def logical_not(self): warnings.warn( diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py index 705645b8349..04809f8fd59 100644 --- a/python/cudf/cudf/io/text.py +++ b/python/cudf/cudf/io/text.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. from io import BytesIO, StringIO @@ -12,7 +12,7 @@ @annotate("READ_TEXT", color="purple", domain="cudf_python") @ioutils.doc_read_text() def read_text( - filepath_or_buffer, delimiter=None, **kwargs, + filepath_or_buffer, delimiter=None, byte_range=None, **kwargs, ): """{docstring}""" @@ -24,5 +24,7 @@ def read_text( ) return cudf.Series._from_data( - libtext.read_text(filepath_or_buffer, delimiter=delimiter,) + libtext.read_text( + filepath_or_buffer, delimiter=delimiter, byte_range=byte_range + ) ) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index f3d69e1745e..6176184b670 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1315,7 +1315,7 @@ def test_csv_reader_aligned_byte_range(tmpdir): [(None, None), ("int", "hex"), ("int32", "hex32"), ("int64", "hex64")], ) def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype): - lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF", "9512c20b"] + lines = ["0x0", "-0x1000", "0xfedcba", "0xABCDEF", "0xaBcDeF"] values = [int(hex_int, 16) for hex_int in lines] buffer = "\n".join(lines) @@ -1334,6 +1334,35 @@ def test_csv_reader_hexadecimals(pdf_dtype, gdf_dtype): assert_eq(pdf, gdf) +@pytest.mark.parametrize( + "np_dtype, gdf_dtype", + [("int", "hex"), ("int32", "hex32"), ("int64", "hex64")], +) +def test_csv_reader_hexadecimal_overflow(np_dtype, gdf_dtype): + # This tests values which cause an overflow warning that will become an + # error in pandas. NumPy wraps the overflow silently up to the bounds of a + # signed int64. + lines = [ + "0x0", + "-0x1000", + "0xfedcba", + "0xABCDEF", + "0xaBcDeF", + "0x9512c20b", + "0x7fffffff", + "0x7fffffffffffffff", + "-0x8000000000000000", + ] + values = [int(hex_int, 16) for hex_int in lines] + buffer = "\n".join(lines) + + gdf = read_csv(StringIO(buffer), dtype=[gdf_dtype], names=["hex_int"]) + + expected = np.array(values, dtype=np_dtype) + actual = gdf["hex_int"].to_numpy() + np.testing.assert_array_equal(expected, actual) + + @pytest.mark.parametrize("quoting", [0, 1, 2, 3]) def test_csv_reader_pd_consistent_quotes(quoting): names = ["text"] diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index 5ff66fc750f..fb6505f5f92 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. import numpy as np import pytest @@ -778,3 +778,54 @@ def test_read_text(datadir): actual = cudf.read_text(chess_file, delimiter=delimiter) assert_eq(expected, actual) + + +def test_read_text_byte_range(datadir): + chess_file = str(datadir) + "/chess.pgn" + delimiter = "1." + + with open(chess_file, "r") as f: + data = f.read() + content = data.split(delimiter) + + # Since Python split removes the delimiter and read_text does + # not we need to add it back to the 'content' + expected = cudf.Series( + [ + c + delimiter if i < (len(content) - 1) else c + for i, c in enumerate(content) + ] + ) + + byte_range_size = (len(data) // 3) + (len(data) % 3 != 0) + + actual_0 = cudf.read_text( + chess_file, + delimiter=delimiter, + byte_range=[byte_range_size * 0, byte_range_size], + ) + actual_1 = cudf.read_text( + chess_file, + delimiter=delimiter, + byte_range=[byte_range_size * 1, byte_range_size], + ) + actual_2 = cudf.read_text( + chess_file, + delimiter=delimiter, + byte_range=[byte_range_size * 2, byte_range_size], + ) + + actual = cudf.concat([actual_0, actual_1, actual_2], ignore_index=True) + + assert_eq(expected, actual) + + +def test_read_text_byte_range_large(datadir): + content = str(("\n" if x % 5 == 0 else "x") for x in range(0, 300000000)) + delimiter = "1." + temp_file = str(datadir) + "/temp.txt" + + with open(temp_file, "w") as f: + f.write(content) + + cudf.read_text(temp_file, delimiter=delimiter) diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 4dadfede866..315da4d8dd6 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -24,12 +24,6 @@ _EQUALITY_OPS = { - "eq", - "ne", - "lt", - "gt", - "le", - "ge", "__eq__", "__ne__", "__lt__",