From 8497e591968356c8920da6b8ca0341dc98e585c7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 13 May 2024 09:53:36 -0700 Subject: [PATCH 1/3] Fix string functors Signed-off-by: Nghia Truong --- src/main/cpp/src/cast_decimal_to_string.cu | 17 +++++++++-------- src/main/cpp/src/cast_float_to_string.cu | 11 ++++++----- src/main/cpp/src/format_float.cu | 9 +++++---- src/main/cpp/src/map_utils.cu | 21 ++++++++------------- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/main/cpp/src/cast_decimal_to_string.cu b/src/main/cpp/src/cast_decimal_to_string.cu index 9d0e27ed59..35f0993f3b 100644 --- a/src/main/cpp/src/cast_decimal_to_string.cu +++ b/src/main/cpp/src/cast_decimal_to_string.cu @@ -52,8 +52,9 @@ namespace { template struct decimal_to_non_ansi_string_fn { column_device_view d_decimals; - size_type* d_offsets{}; - char* d_chars{}; + cudf::size_type* d_sizes; + char* d_chars; + cudf::detail::input_offsetalator d_offsets; /** * @brief Calculates the size of the string required to convert the element, in base-10 format. @@ -87,9 +88,9 @@ struct decimal_to_non_ansi_string_fn { } else { // positive scale or adjusted exponent < -6 means scientific notation auto const extra_digits = abs_value_digits > 1 ? 3 : 2; - return static_cast(value < 0) + // sign if negative - abs_value_digits + // number of digits - extra_digits + // decimal point if exists, E, +/- + return static_cast(value < 0) + // sign if negative + abs_value_digits + // number of digits + extra_digits + // decimal point if exists, E, +/- strings::detail::count_digits( numeric::detail::abs(adjusted_exponent)); // exponent portion } @@ -127,7 +128,7 @@ struct decimal_to_non_ansi_string_fn { d_buffer += strings::detail::integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part if (scale != 0) { - *d_buffer++ = '.'; // add decimal point + *d_buffer++ = '.'; // add decimal point thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros d_buffer += num_zeros; @@ -162,13 +163,13 @@ struct decimal_to_non_ansi_string_fn { __device__ void operator()(size_type idx) { if (d_decimals.is_null(idx)) { - if (d_chars == nullptr) { d_offsets[idx] = 0; } + if (d_chars == nullptr) { d_sizes[idx] = 0; } return; } if (d_chars != nullptr) { decimal_to_non_ansi_string(idx); } else { - d_offsets[idx] = compute_output_size(d_decimals.element(idx)); + d_sizes[idx] = compute_output_size(d_decimals.element(idx)); } } }; diff --git a/src/main/cpp/src/cast_float_to_string.cu b/src/main/cpp/src/cast_float_to_string.cu index 78cedbbf64..b294ca6f1b 100644 --- a/src/main/cpp/src/cast_float_to_string.cu +++ b/src/main/cpp/src/cast_float_to_string.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,9 @@ namespace { template struct float_to_string_fn { cudf::column_device_view d_floats; - cudf::size_type* d_offsets; + cudf::size_type* d_sizes; char* d_chars; + cudf::detail::input_offsetalator d_offsets; __device__ cudf::size_type compute_output_size(cudf::size_type idx) const { @@ -56,13 +57,13 @@ struct float_to_string_fn { __device__ void operator()(cudf::size_type idx) const { if (d_floats.is_null(idx)) { - if (d_chars == nullptr) { d_offsets[idx] = 0; } + if (d_chars == nullptr) { d_sizes[idx] = 0; } return; } if (d_chars != nullptr) { float_to_string(idx); } else { - d_offsets[idx] = compute_output_size(idx); + d_sizes[idx] = compute_output_size(idx); } } }; @@ -124,4 +125,4 @@ std::unique_ptr float_to_string(cudf::column_view const& floats, return detail::float_to_string(floats, stream, mr); } -} // namespace spark_rapids_jni \ No newline at end of file +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/format_float.cu b/src/main/cpp/src/format_float.cu index 1d537595d7..bc3c85bbcc 100644 --- a/src/main/cpp/src/format_float.cu +++ b/src/main/cpp/src/format_float.cu @@ -35,8 +35,9 @@ template struct format_float_fn { cudf::column_device_view d_floats; int digits; - cudf::size_type* d_offsets; + cudf::size_type* d_sizes; char* d_chars; + cudf::detail::input_offsetalator d_offsets; __device__ cudf::size_type compute_output_size(FloatType const value) const { @@ -56,13 +57,13 @@ struct format_float_fn { __device__ void operator()(cudf::size_type const idx) const { if (d_floats.is_null(idx)) { - if (d_chars == nullptr) { d_offsets[idx] = 0; } + if (d_chars == nullptr) { d_sizes[idx] = 0; } return; } if (d_chars != nullptr) { format_float(idx); } else { - d_offsets[idx] = compute_output_size(d_floats.element(idx)); + d_sizes[idx] = compute_output_size(d_floats.element(idx)); } } }; @@ -128,4 +129,4 @@ std::unique_ptr format_float(cudf::column_view const& floats, return detail::format_float(floats, digits, stream, mr); } -} // namespace spark_rapids_jni \ No newline at end of file +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu index 6ae54f4fe9..bd02859187 100644 --- a/src/main/cpp/src/map_utils.cu +++ b/src/main/cpp/src/map_utils.cu @@ -16,10 +16,6 @@ #include "map_utils_debug.cuh" -// -#include - -// #include #include #include @@ -31,11 +27,11 @@ #include #include -// #include #include -// +#include +#include #include #include #include @@ -51,9 +47,7 @@ #include #include -// -#include -#include +#include namespace spark_rapids_jni { @@ -77,7 +71,7 @@ rmm::device_uvector unify_json_strings(cudf::column_view const& input, auto const input_scv = cudf::strings_column_view{input}; auto const chars_size = input_scv.chars_size(stream); auto const output_size = - 2l + // two extra bracket characters '[' and ']' + 2l + // two extra bracket characters '[' and ']' static_cast(chars_size) + static_cast(input.size() - 1) + // append `,` character between input rows static_cast(input.null_count()) * 2l; // replace null with "{}" @@ -520,8 +514,9 @@ struct substring_fn { cudf::device_span const d_string; cudf::device_span const> const d_ranges; - cudf::size_type* d_offsets{}; - char* d_chars{}; + cudf::size_type* d_sizes; + char* d_chars; + cudf::detail::input_offsetalator d_offsets; __device__ void operator()(cudf::size_type const idx) { @@ -530,7 +525,7 @@ struct substring_fn { if (d_chars) { memcpy(d_chars + d_offsets[idx], d_string.data() + range.first, size); } else { - d_offsets[idx] = size; + d_sizes[idx] = size; } } }; From 71bfd6c77bd31056e92278ff72f26be873faf523 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 13 May 2024 10:00:39 -0700 Subject: [PATCH 2/3] Fix style Signed-off-by: Nghia Truong --- src/main/cpp/src/cast_decimal_to_string.cu | 8 ++++---- src/main/cpp/src/map_utils.cu | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/cpp/src/cast_decimal_to_string.cu b/src/main/cpp/src/cast_decimal_to_string.cu index 35f0993f3b..91b155dae4 100644 --- a/src/main/cpp/src/cast_decimal_to_string.cu +++ b/src/main/cpp/src/cast_decimal_to_string.cu @@ -88,9 +88,9 @@ struct decimal_to_non_ansi_string_fn { } else { // positive scale or adjusted exponent < -6 means scientific notation auto const extra_digits = abs_value_digits > 1 ? 3 : 2; - return static_cast(value < 0) + // sign if negative - abs_value_digits + // number of digits - extra_digits + // decimal point if exists, E, +/- + return static_cast(value < 0) + // sign if negative + abs_value_digits + // number of digits + extra_digits + // decimal point if exists, E, +/- strings::detail::count_digits( numeric::detail::abs(adjusted_exponent)); // exponent portion } @@ -128,7 +128,7 @@ struct decimal_to_non_ansi_string_fn { d_buffer += strings::detail::integer_to_string(abs_value / exp_ten, d_buffer); // add the integer part if (scale != 0) { - *d_buffer++ = '.'; // add decimal point + *d_buffer++ = '.'; // add decimal point thrust::generate_n(thrust::seq, d_buffer, num_zeros, []() { return '0'; }); // add zeros d_buffer += num_zeros; diff --git a/src/main/cpp/src/map_utils.cu b/src/main/cpp/src/map_utils.cu index bd02859187..529ed1d90c 100644 --- a/src/main/cpp/src/map_utils.cu +++ b/src/main/cpp/src/map_utils.cu @@ -71,7 +71,7 @@ rmm::device_uvector unify_json_strings(cudf::column_view const& input, auto const input_scv = cudf::strings_column_view{input}; auto const chars_size = input_scv.chars_size(stream); auto const output_size = - 2l + // two extra bracket characters '[' and ']' + 2l + // two extra bracket characters '[' and ']' static_cast(chars_size) + static_cast(input.size() - 1) + // append `,` character between input rows static_cast(input.null_count()) * 2l; // replace null with "{}" From e0fbc9cb18b2572ef66c719996fdce0751c6d49b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 13 May 2024 11:15:45 -0700 Subject: [PATCH 3/3] Include changes from cudf Signed-off-by: Nghia Truong --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index a4cd1d8776..13f028f01a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit a4cd1d877631e4554c53b57202564398b758324c +Subproject commit 13f028f01ad043b0d24f3e4a28f4267c02806390