Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string conversion to/from decimal values libcudf APIs #7364

Merged
merged 25 commits into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f144689
Add libcudf strings to/from fixed-point convert APIs
davidwendt Feb 10, 2021
af1c082
Merge branch 'branch-0.19' into strings-to-fixed-point
davidwendt Feb 10, 2021
66ab111
use make_counting_transform_iterator utility
davidwendt Feb 10, 2021
224f07b
fix doxygen example for is_fixed_point
davidwendt Feb 10, 2021
b11a5a9
add new header file to meta.yaml
davidwendt Feb 10, 2021
bf68768
add examples to doxygen comments
davidwendt Feb 10, 2021
b2b232e
include fraction-only test value in FromFixedPoint test
davidwendt Feb 10, 2021
0091585
fix doxygen comments
davidwendt Feb 10, 2021
8cde31a
add is_fixed_point code logic
davidwendt Feb 10, 2021
72a1f7f
Merge branch 'branch-0.19' into strings-to-fixed-point
davidwendt Feb 10, 2021
4c1a585
Merge branch 'branch-0.19' into strings-to-fixed-point
davidwendt Feb 11, 2021
f3825e0
add missing @throw doxygen tag
davidwendt Feb 11, 2021
ea70907
add const to variables and member functions
davidwendt Feb 11, 2021
cfece1d
recode to use passed in scale type instead of inferring
davidwendt Feb 12, 2021
f9b8707
fix doxygen for scale input
davidwendt Feb 16, 2021
5a380fc
return size from integer_to_string; make count_digits constexpr
davidwendt Feb 16, 2021
d261be9
fix doxygen parameter name reference
davidwendt Feb 16, 2021
d4d8441
use integer_to_string return to incr output buffer pointer
davidwendt Feb 16, 2021
2797088
update doxygen to claim UB for invalid data format
davidwendt Feb 17, 2021
28b7021
update copyright year
davidwendt Feb 17, 2021
3aa8d77
add const to bool var declaration
davidwendt Feb 17, 2021
b01c347
update invalid data format doxygen statement
davidwendt Feb 17, 2021
4f7ab54
add very-large and very-small test strings
davidwendt Feb 18, 2021
89921f2
fix overflow bug handling very small numbers
davidwendt Feb 18, 2021
c88f427
Merge branch 'branch-0.19' into strings-to-fixed-point
davidwendt Feb 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ test:
- test -f $PREFIX/include/cudf/strings/convert/convert_booleans.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_datetime.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_durations.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_fixed_point.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_floats.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_integers.hpp
- test -f $PREFIX/include/cudf/strings/convert/convert_ipv4.hpp
Expand Down
14 changes: 8 additions & 6 deletions cpp/include/cudf/fixed_point/fixed_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,21 +537,23 @@ class fixed_point {
explicit operator std::string() const
{
if (_scale < 0) {
int const n = std::pow(10, -_scale);
int const f = _value % n;
auto const av = std::abs(_value);
int const n = std::pow(10, -_scale);
int const f = av % n;
auto const num_zeros =
std::max(0, (-_scale - static_cast<int32_t>(std::to_string(f).size())));
auto const zeros = std::string(num_zeros, '0');
return std::to_string(_value / n) + std::string(".") + zeros +
std::to_string(std::abs(_value) % n);
auto const sign = _value < 0 ? std::string("-") : std::string();
return sign + std::to_string(av / n) + std::string(".") + zeros + std::to_string(av % n);
harrism marked this conversation as resolved.
Show resolved Hide resolved
} else {
auto const zeros = std::string(_scale, '0');
return std::to_string(_value) + zeros;
}
}
}; // namespace numeric
};

/** @brief Function for identifying integer overflow when adding
/**
* @brief Function for identifying integer overflow when adding
*
* @tparam Rep Type of integer to check for overflow on
* @tparam T Types of lhs and rhs (ensures they are the same type)
Expand Down
124 changes: 124 additions & 0 deletions cpp/include/cudf/strings/convert/convert_fixed_point.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/strings/strings_column_view.hpp>

namespace cudf {
namespace strings {
/**
* @addtogroup strings_convert
* @{
* @file
*/

/**
* @brief Returns a new fixed-point column parsing decimal values from the
* provided strings column.
*
* Any null entries will result in corresponding null entries in the output column.
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*
* Only characters [0-9] plus a prefix '-' and '+' and a single decimal point
* are recognized. When any other character is encountered, the parsing ends
* for that string and the current digits are converted into a fixed-point value.
harrism marked this conversation as resolved.
Show resolved Hide resolved
*
* @code{.pseudo}
* Example:
* s = ['123', '-876', '543.2', '-0.12']
* dt = {DECIMAL32, scale=-2}
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
* fp = to_fixed_point(s, dt)
* fp is [123400, -87600, 54320, -12]
* @endcode
*
* Overflow of the resulting value type is not checked.
* The scale in the `output_type` is used for setting the integer component.
*
* @throw cudf::logic_error if `output_type` is not a fixed-point decimal type.
*
* @param strings Strings instance for this operation.
* @param output_type Type of fixed-point column to return including the scale value.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of `output_type`.
*/
std::unique_ptr<column> to_fixed_point(
harrism marked this conversation as resolved.
Show resolved Hide resolved
strings_column_view const& input,
data_type output_type,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a new strings column converting the fixed-point values
* into a strings column.
*
* Any null entries will result in corresponding null entries in the output column.
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
*
* For each value, a string is created in base-10 decimal.
* Negative numbers will include a '-' prefix.
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
* The column's scale value is used to place the decimal point.
* A negative scale value may add padded zeros after the decimal point.
*
* @code{.pseudo}
* Example:
* fp is [110, 222, 3330, -440, -1] with scale = -2
* s = from_fixed_point(fp)
* s is now ['1.10', '2.22', '33.30', '-4.40', '-0.01']
* @endcode
*
* @throw cudf::logic_error if the `input` column is not a fixed-point decimal type.
*
* @param input Fixed-point column to convert.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New strings column.
*/
std::unique_ptr<column> from_fixed_point(
harrism marked this conversation as resolved.
Show resolved Hide resolved
column_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to fixed-point.
*
* The output row entry will be set to `true` if the corresponding string element
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
* has at least one character in [+-0123456789.]. The optional sign character
* must only be in the first position. The decimal point may only appear once.
* Also, the integer component must fit within the size limits of the
* underlying fixed-point storage type. The value of the integer component
* is based on the scale of the `decimal_type` provided.
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', '1.2.3', '+17E30', '12.34' '.789', '-0.005]
harrism marked this conversation as resolved.
Show resolved Hide resolved
* b = is_fixed_point(s)
* b is [true, true, false, false, false, true, true, true]
* @endcode
*
* Any null row results in a null entry for that row in the output column.
*
* @throw cudf::logic_error if the `decimal_type` is not a fixed-point decimal type.
*
* @param input Strings instance for this operation.
* @param decimal_type Fixed-point type (with scale) used only for checking overflow.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of boolean results for each string.
*/
std::unique_ptr<column> is_fixed_point(
strings_column_view const& input,
data_type decimal_type = data_type{type_id::DECIMAL64},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
} // namespace strings
} // namespace cudf
21 changes: 20 additions & 1 deletion cpp/include/cudf/strings/detail/converters.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -132,6 +132,25 @@ std::unique_ptr<column> from_durations(column_view const& durations,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc to_fixed_point(strings_column_view const&,data_type,rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> to_fixed_point(strings_column_view const& strings,
data_type output_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @copydoc from_fixed_point(strings_column_view const&,rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> from_fixed_point(column_view const& integers,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace strings
} // namespace cudf
Loading