Skip to content

Commit

Permalink
Merge branch 'branch-0.19' into benchmark-strings-concat
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Mar 18, 2021
2 parents 5208be1 + 99001d2 commit d8cf0b3
Show file tree
Hide file tree
Showing 24 changed files with 507 additions and 375 deletions.
78 changes: 1 addition & 77 deletions cpp/include/cudf/strings/char_types/char_types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -146,82 +146,6 @@ std::unique_ptr<column> filter_characters_of_type(
string_character_types types_to_keep = string_character_types::ALL_TYPES,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to integers.
*
* The output row entry will be set to `true` if the corresponding string element
* has at least one character in [-+0-9].
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', 'A', '+7']
* b = s.is_integer(s)
* b is [true, true, false, false, true]
* @endcode
*
* Any null row results in a null entry for that row in the output column.
*
* @param strings Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of boolean results for each string.
*/
std::unique_ptr<column> is_integer(
strings_column_view const& strings,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns `true` if all strings contain
* characters that are valid for conversion to integers.
*
* This function will return `true` if all string elements
* has at least one character in [-+0-9].
*
* Any null entry or empty string will cause this function to return `false`.
*
* @param strings Strings instance for this operation.
* @return true if all string are valid
*/
bool all_integer(strings_column_view const& strings);

/**
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to floats.
*
* The output row entry will be set to `true` if the corresponding string element
* has at least one character in [-+0-9eE.].
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5']
* b = s.is_float(s)
* b is [true, true, false, false, true, true, true]
* @endcode
*
* Any null row results in a null entry for that row in the output column.
*
* @param strings Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of boolean results for each string.
*/
std::unique_ptr<column> is_float(
strings_column_view const& strings,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns `true` if all strings contain
* characters that are valid for conversion to floats.
*
* This function will return `true` if all string elements
* has at least one character in [-+0-9eE.].
*
* Any null entry or empty string will cause this function to return `false`.
*
* @param strings Strings instance for this operation.
* @return true if all string are valid
*/
bool all_float(strings_column_view const& strings);

/** @} */ // end of doxygen group
} // namespace strings
} // namespace cudf
26 changes: 25 additions & 1 deletion cpp/include/cudf/strings/convert/convert_floats.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,6 +68,30 @@ std::unique_ptr<column> from_floats(
column_view const& floats,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to floats.
*
* The output row entry will be set to `true` if the corresponding string element
* has at least one character in [-+0-9eE.].
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5']
* b = s.is_float(s)
* b is [true, true, false, false, true, true, true]
* @endcode
*
* Any null row results in a null entry for that row in the output column.
*
* @param strings Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of boolean results for each string.
*/
std::unique_ptr<column> is_float(
strings_column_view const& strings,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
} // namespace strings
} // namespace cudf
26 changes: 25 additions & 1 deletion cpp/include/cudf/strings/convert/convert_integers.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,6 +73,30 @@ std::unique_ptr<column> from_integers(
column_view const& integers,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a boolean column identifying strings in which all
* characters are valid for conversion to integers.
*
* The output row entry will be set to `true` if the corresponding string element
* has at least one character in [-+0-9].
*
* @code{.pseudo}
* Example:
* s = ['123', '-456', '', 'A', '+7']
* b = s.is_integer(s)
* b is [true, true, false, false, true]
* @endcode
*
* Any null row results in a null entry for that row in the output column.
*
* @param strings Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column of boolean results for each string.
*/
std::unique_ptr<column> is_integer(
strings_column_view const& strings,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a new integer numeric column parsing hexadecimal values from the
* provided strings column.
Expand Down
113 changes: 1 addition & 112 deletions cpp/src/strings/char_types/char_types.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -186,91 +186,6 @@ std::unique_ptr<column> filter_characters_of_type(strings_column_view const& str
mr);
}

std::unique_ptr<column> is_integer(
strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
// create output column
auto results = make_numeric_column(data_type{type_id::BOOL8},
strings.size(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
strings.null_count(),
stream,
mr);
auto d_results = results->mutable_view().data<bool>();
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings.size()),
d_results,
[d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return false;
return string::is_integer(d_column.element<string_view>(idx));
});
results->set_null_count(strings.null_count());
return results;
}

bool all_integer(strings_column_view const& strings, rmm::cuda_stream_view stream)
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
auto transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), [d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return false;
return string::is_integer(d_column.element<string_view>(idx));
});
return thrust::all_of(rmm::exec_policy(stream),
transformer_itr,
transformer_itr + strings.size(),
thrust::identity<bool>());
}

std::unique_ptr<column> is_float(
strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
// create output column
auto results = make_numeric_column(data_type{type_id::BOOL8},
strings.size(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
strings.null_count(),
stream,
mr);
auto d_results = results->mutable_view().data<bool>();
// check strings for valid float chars
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings.size()),
d_results,
[d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return false;
return string::is_float(d_column.element<string_view>(idx));
});
results->set_null_count(strings.null_count());
return results;
}

bool all_float(strings_column_view const& strings, rmm::cuda_stream_view stream)
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
auto transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), [d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return false;
return string::is_float(d_column.element<string_view>(idx));
});
return thrust::all_of(rmm::exec_policy(stream),
transformer_itr,
transformer_itr + strings.size(),
thrust::identity<bool>());
}

} // namespace detail

// external API
Expand All @@ -295,31 +210,5 @@ std::unique_ptr<column> filter_characters_of_type(strings_column_view const& str
strings, types_to_remove, replacement, types_to_keep, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> is_integer(strings_column_view const& strings,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::is_integer(strings, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> is_float(strings_column_view const& strings,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::is_float(strings, rmm::cuda_stream_default, mr);
}

bool all_integer(strings_column_view const& strings)
{
CUDF_FUNC_RANGE();
return detail::all_integer(strings, rmm::cuda_stream_default);
}

bool all_float(strings_column_view const& strings)
{
CUDF_FUNC_RANGE();
return detail::all_float(strings, rmm::cuda_stream_default);
}

} // namespace strings
} // namespace cudf
41 changes: 40 additions & 1 deletion cpp/src/strings/convert/convert_floats.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/strings/convert/convert_floats.hpp>
#include <cudf/strings/detail/converters.hpp>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/traits.hpp>
Expand Down Expand Up @@ -536,12 +537,50 @@ std::unique_ptr<column> from_floats(column_view const& floats,
} // namespace detail

// external API

std::unique_ptr<column> from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::from_floats(floats, rmm::cuda_stream_default, mr);
}

namespace detail {
std::unique_ptr<column> is_float(
strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
// create output column
auto results = make_numeric_column(data_type{type_id::BOOL8},
strings.size(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
strings.null_count(),
stream,
mr);
auto d_results = results->mutable_view().data<bool>();
// check strings for valid float chars
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings.size()),
d_results,
[d_column] __device__(size_type idx) {
if (d_column.is_null(idx)) return false;
return string::is_float(d_column.element<string_view>(idx));
});
results->set_null_count(strings.null_count());
return results;
}

} // namespace detail

// external API
std::unique_ptr<column> is_float(strings_column_view const& strings,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::is_float(strings, rmm::cuda_stream_default, mr);
}

} // namespace strings
} // namespace cudf
Loading

0 comments on commit d8cf0b3

Please sign in to comment.