From a6985de64584a8278f82790c8f55f285c79a7f93 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 19 May 2023 16:04:28 -0500 Subject: [PATCH] Upmerge to the latest CUDF and fix compile errors (#1154) Signed-off-by: Robert (Bobby) Evans --- src/main/cpp/src/cast_string.cu | 19 ++++++++++++++----- src/main/cpp/src/map_utils.cu | 5 ++++- src/main/cpp/src/row_conversion.cu | 7 +++++-- src/main/cpp/tests/cast_string.cpp | 9 ++++++--- thirdparty/cudf | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/main/cpp/src/cast_string.cu b/src/main/cpp/src/cast_string.cu index a6f0f0d0de..2cfcc62630 100644 --- a/src/main/cpp/src/cast_string.cu +++ b/src/main/cpp/src/cast_string.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -652,7 +653,8 @@ struct string_to_integer_impl { rmm::mr::device_memory_resource* mr) { if (string_col.size() == 0) { - return std::make_unique(data_type{type_to_id()}, 0, rmm::device_buffer{}); + return std::make_unique(data_type{type_to_id()}, 0, rmm::device_buffer{}, + rmm::device_buffer{}, 0); } rmm::device_uvector data(string_col.size(), stream, mr); @@ -672,8 +674,10 @@ struct string_to_integer_impl { ansi_mode, strip); + auto null_count = cudf::detail::null_count(null_mask.data(), 0, string_col.size(), stream); + auto col = std::make_unique( - data_type{type_to_id()}, string_col.size(), data.release(), null_mask.release()); + data_type{type_to_id()}, string_col.size(), data.release(), null_mask.release(), null_count); if (ansi_mode) { validate_ansi_column(col->view(), string_col, stream); } @@ -737,8 +741,11 @@ struct string_to_decimal_impl { precision, strip); + auto null_count = cudf::detail::null_count(null_mask.data(), 0, string_col.size(), stream); + auto col = - std::make_unique(dtype, string_col.size(), data.release(), null_mask.release()); + std::make_unique(dtype, string_col.size(), data.release(), + null_mask.release(), null_count); if (ansi_mode) { validate_ansi_column(col->view(), string_col, stream); } @@ -818,7 +825,9 @@ std::unique_ptr string_to_decimal(int32_t precision, CUDF_FAIL("Unable to support decimal with precision " + std::to_string(precision)); }(); - if (string_col.size() == 0) { return std::make_unique(dtype, 0, rmm::device_buffer{}); } + if (string_col.size() == 0) { + return std::make_unique(dtype, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0); + } return type_dispatcher( dtype, detail::string_to_decimal_impl{}, dtype, precision, string_col, ansi_mode, strip, stream, mr); diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu index b86f6fb5f3..d0367206ae 100644 --- a/src/main/cpp/src/map_utils.cu +++ b/src/main/cpp/src/map_utils.cu @@ -628,8 +628,11 @@ std::unique_ptr from_json(cudf::column_view const &input, auto structs_col = cudf::make_structs_column(num_pairs, std::move(out_keys_vals), 0, rmm::device_buffer{}, stream, mr); + auto offsets = std::make_unique(std::move(list_offsets), + rmm::device_buffer{}, 0); + return cudf::make_lists_column( - input.size(), std::make_unique(std::move(list_offsets)), std::move(structs_col), + input.size(), std::move(offsets), std::move(structs_col), input.null_count(), cudf::detail::copy_bitmask(input, stream, mr), stream, mr); } diff --git a/src/main/cpp/src/row_conversion.cu b/src/main/cpp/src/row_conversion.cu index 88555d22c6..3fe7cf3c6c 100644 --- a/src/main/cpp/src/row_conversion.cu +++ b/src/main/cpp/src/row_conversion.cu @@ -1886,10 +1886,13 @@ std::vector> convert_to_rows( auto const offset_count = batch_info.row_batches[batch].row_offsets.size(); auto offsets = std::make_unique( data_type{type_id::INT32}, (size_type)offset_count, - batch_info.row_batches[batch].row_offsets.release()); + batch_info.row_batches[batch].row_offsets.release(), + rmm::device_buffer{}, 0); auto data = std::make_unique(data_type{type_id::INT8}, batch_info.row_batches[batch].num_bytes, - std::move(output_buffers[batch])); + std::move(output_buffers[batch]), + rmm::device_buffer{}, + 0); return make_lists_column( batch_info.row_batches[batch].row_count, std::move(offsets), std::move(data), diff --git a/src/main/cpp/tests/cast_string.cpp b/src/main/cpp/tests/cast_string.cpp index 1970b443da..598d570611 100644 --- a/src/main/cpp/tests/cast_string.cpp +++ b/src/main/cpp/tests/cast_string.cpp @@ -240,7 +240,8 @@ TYPED_TEST(StringToIntegerTests, Overflow) TYPED_TEST(StringToIntegerTests, Empty) { - auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}); + auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}, + rmm::device_buffer{}, 0); auto result = spark_rapids_jni::string_to_integer(data_type{type_to_id()}, strings_column_view{empty->view()}, @@ -541,7 +542,8 @@ TEST_F(StringToDecimalTests, Edges) TEST_F(StringToDecimalTests, Empty) { - auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}); + auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}, + rmm::device_buffer{}, 0); auto const result = spark_rapids_jni::string_to_decimal( 8, 2, strings_column_view{empty->view()}, false, true, cudf::get_default_stream()); @@ -696,7 +698,8 @@ TYPED_TEST(StringToFloatTests, TrickyValues) TYPED_TEST(StringToFloatTests, Empty) { - auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}); + auto empty = std::make_unique(data_type{type_id::STRING}, 0, rmm::device_buffer{}, + rmm::device_buffer{}, 0); auto const result = spark_rapids_jni::string_to_float(data_type{type_to_id()}, strings_column_view{empty->view()}, diff --git a/thirdparty/cudf b/thirdparty/cudf index c601b83f3e..72c067726c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c601b83f3e03b3a1ef489daafb0dd520b3fdbe29 +Subproject commit 72c067726ccfb6e87033d34ab07b4dc79b5e4a3e