Skip to content

Commit

Permalink
Return error if BOOL8 column-type is used with integers-to-hex (#14208)
Browse files Browse the repository at this point in the history
Removes support to convert BOOL8 column-type to hex using `cudf::strings::integers_to_hex`.
Also fixed other integer to string conversions to remove this unsupported type.
Added gtests to verify an error is thrown for this case.

Closes #14232

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)
  - MithunR (https://github.com/mythrocks)

URL: #14208
  • Loading branch information
davidwendt authored Oct 13, 2023
1 parent fa4e8ab commit 6e00ad0
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 45 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/convert/convert_integers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,14 @@ std::unique_ptr<column> is_hex(
*
* @code{.pseudo}
* Example:
* input = [123, -1, 0, 27, 342718233] // int32 type input column
* input = [1234, -1, 0, 27, 342718233] // int32 type input column
* s = integers_to_hex(input)
* s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719']
* @endcode
*
* The example above shows an `INT32` type column where each integer is 4 bytes.
* Leading zeros are suppressed unless filling out a complete byte as in
* `123 -> '04D2'` instead of `000004D2` or `4D2`.
* `1234 -> '04D2'` instead of `000004D2` or `4D2`.
*
* @throw cudf::logic_error if the input column is not integral type.
*
Expand Down
24 changes: 24 additions & 0 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,30 @@ constexpr inline bool is_integral()
*/
bool is_integral(data_type type);

/**
* @brief Indicates whether the type `T` is an integral type but not bool type.
*
* @tparam T The type to verify
* @return true `T` is integral but not bool
* @return false `T` is not integral or is bool
*/
template <typename T>
constexpr inline bool is_integral_not_bool()
{
return cuda::std::is_integral_v<T> and not std::is_same_v<T, bool>;
}

/**
* @brief Indicates whether `type` is a integral `data_type` and not BOOL8
*
* "Integral" types are fundamental integer types such as `INT*` and `UINT*`.
*
* @param type The `data_type` to verify
* @return true `type` is integral but not bool
* @return false `type` is integral or is bool
*/
bool is_integral_not_bool(data_type type);

/**
* @brief Indicates whether the type `T` is a floating point type.
*
Expand Down
27 changes: 11 additions & 16 deletions cpp/src/strings/convert/convert_hex.cu
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ struct hex_to_integer_fn {
* The output_column is expected to be one of the integer types only.
*/
struct dispatch_hex_to_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
void operator()(column_device_view const& strings_column,
mutable_column_view& output_column,
rmm::cuda_stream_view stream) const
Expand All @@ -105,22 +106,14 @@ struct dispatch_hex_to_integers_fn {
d_results,
hex_to_integer_fn<IntegerType>{strings_column});
}
// non-integral types throw an exception
// non-integer types throw an exception
template <typename T, typename... Args>
std::enable_if_t<not std::is_integral_v<T>, void> operator()(Args&&...) const
std::enable_if_t<not cudf::is_integral_not_bool<T>(), void> operator()(Args&&...) const
{
CUDF_FAIL("Output for hex_to_integers must be an integral type.");
CUDF_FAIL("Output for hex_to_integers must be an integer type.");
}
};

template <>
void dispatch_hex_to_integers_fn::operator()<bool>(column_device_view const&,
mutable_column_view&,
rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for hex_to_integers must not be a boolean type.");
}

/**
* @brief Functor to convert integers to hexadecimal strings
*
Expand Down Expand Up @@ -179,7 +172,8 @@ struct integer_to_hex_fn {
};

struct dispatch_integers_to_hex_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand All @@ -195,11 +189,12 @@ struct dispatch_integers_to_hex_fn {
input.null_count(),
cudf::detail::copy_bitmask(input, stream, mr));
}
// non-integral types throw an exception
// non-integer types throw an exception
template <typename T, typename... Args>
std::enable_if_t<not std::is_integral_v<T>, std::unique_ptr<column>> operator()(Args...) const
std::enable_if_t<not cudf::is_integral_not_bool<T>(), std::unique_ptr<column>> operator()(
Args...) const
{
CUDF_FAIL("integers_to_hex only supports integral type columns");
CUDF_FAIL("integers_to_hex only supports integer type columns");
}
};

Expand Down
38 changes: 12 additions & 26 deletions cpp/src/strings/convert/convert_integers.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ inline __device__ bool is_integer(string_view const& d_str)
* @brief The dispatch functions for checking if strings are valid integers.
*/
struct dispatch_is_integer_fn {
template <typename T, std::enable_if_t<std::is_integral_v<T>>* = nullptr>
template <typename T, std::enable_if_t<cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand Down Expand Up @@ -145,7 +145,7 @@ struct dispatch_is_integer_fn {
return results;
}

template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(strings_column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*) const
Expand Down Expand Up @@ -243,7 +243,8 @@ struct string_to_integer_fn {
* The output_column is expected to be one of the integer types only.
*/
struct dispatch_to_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
void operator()(column_device_view const& strings_column,
mutable_column_view& output_column,
rmm::cuda_stream_view stream) const
Expand All @@ -254,22 +255,14 @@ struct dispatch_to_integers_fn {
output_column.data<IntegerType>(),
string_to_integer_fn<IntegerType>{strings_column});
}
// non-integral types throw an exception
template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
// non-integer types throw an exception
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
void operator()(column_device_view const&, mutable_column_view&, rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for to_integers must be an integral type.");
CUDF_FAIL("Output for to_integers must be an integer type.");
}
};

template <>
void dispatch_to_integers_fn::operator()<bool>(column_device_view const&,
mutable_column_view&,
rmm::cuda_stream_view) const
{
CUDF_FAIL("Output for to_integers must not be a boolean type.");
}

} // namespace

// This will convert a strings column into any integer column type.
Expand Down Expand Up @@ -351,7 +344,8 @@ struct from_integers_fn {
* The template function declaration ensures only integer types are used.
*/
struct dispatch_from_integers_fn {
template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
template <typename IntegerType,
std::enable_if_t<cudf::is_integral_not_bool<IntegerType>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const& integers,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
Expand All @@ -373,23 +367,15 @@ struct dispatch_from_integers_fn {
std::move(null_mask));
}

// non-integral types throw an exception
template <typename T, std::enable_if_t<not std::is_integral_v<T>>* = nullptr>
// non-integer types throw an exception
template <typename T, std::enable_if_t<not cudf::is_integral_not_bool<T>()>* = nullptr>
std::unique_ptr<column> operator()(column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Values for from_integers function must be an integral type.");
CUDF_FAIL("Values for from_integers function must be an integer type.");
}
};

template <>
std::unique_ptr<column> dispatch_from_integers_fn::operator()<bool>(
column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) const
{
CUDF_FAIL("Input for from_integers must not be a boolean type.");
}

} // namespace

// This will convert all integer column types into a strings column.
Expand Down
15 changes: 14 additions & 1 deletion cpp/src/utilities/traits.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -158,6 +158,19 @@ struct is_integral_impl {

bool is_integral(data_type type) { return cudf::type_dispatcher(type, is_integral_impl{}); }

struct is_integral_not_bool_impl {
template <typename T>
constexpr bool operator()()
{
return is_integral_not_bool<T>();
}
};

bool is_integral_not_bool(data_type type)
{
return cudf::type_dispatcher(type, is_integral_not_bool_impl{});
}

struct is_floating_point_impl {
template <typename T>
constexpr bool operator()()
Expand Down
26 changes: 26 additions & 0 deletions cpp/tests/strings/integers_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,29 @@ TEST_F(StringsConvertTest, IntegerToHexWithNull)
auto results = cudf::strings::integers_to_hex(integers);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
}

TEST_F(StringsConvertTest, IntegerConvertErrors)
{
cudf::test::fixed_width_column_wrapper<bool> bools(
{true, true, false, false, true, true, false, true});
cudf::test::fixed_width_column_wrapper<double> floats(
{123456.0, -1.0, 0.0, 0.0, 12.0, 12345.0, 123456789.0});
EXPECT_THROW(cudf::strings::integers_to_hex(bools), cudf::logic_error);
EXPECT_THROW(cudf::strings::integers_to_hex(floats), cudf::logic_error);
EXPECT_THROW(cudf::strings::from_integers(bools), cudf::logic_error);
EXPECT_THROW(cudf::strings::from_integers(floats), cudf::logic_error);

auto input = cudf::test::strings_column_wrapper({"123456", "-1", "0"});
auto view = cudf::strings_column_view(input);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::BOOL8)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::FLOAT32)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::TIMESTAMP_SECONDS)),
cudf::logic_error);
EXPECT_THROW(
cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DURATION_MILLISECONDS)),
cudf::logic_error);
EXPECT_THROW(cudf::strings::to_integers(view, cudf::data_type(cudf::type_id::DECIMAL32)),
cudf::logic_error);
}

0 comments on commit 6e00ad0

Please sign in to comment.