From 941b3af0275033f816e3f8a1a6cdf16d7f76853b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 09:19:37 -0600 Subject: [PATCH 01/13] Remove `all_integer` and `all_float` functions --- .../cudf/strings/char_types/char_types.hpp | 28 ------------- cpp/src/strings/char_types/char_types.cu | 42 ------------------- cpp/tests/strings/chars_types_tests.cpp | 6 --- 3 files changed, 76 deletions(-) diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 300722920f4..46a808da967 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -170,20 +170,6 @@ std::unique_ptr is_integer( strings_column_view const& strings, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to integers. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_integer(strings_column_view const& strings); - /** * @brief Returns a boolean column identifying strings in which all * characters are valid for conversion to floats. @@ -208,20 +194,6 @@ std::unique_ptr is_float( strings_column_view const& strings, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Returns `true` if all strings contain - * characters that are valid for conversion to floats. - * - * This function will return `true` if all string elements - * has at least one character in [-+0-9eE.]. - * - * Any null entry or empty string will cause this function to return `false`. - * - * @param strings Strings instance for this operation. - * @return true if all string are valid - */ -bool all_float(strings_column_view const& strings); - /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index 10496b89328..d4230dd72ce 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -213,21 +213,6 @@ std::unique_ptr is_integer( return results; } -bool all_integer(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - std::unique_ptr is_float( strings_column_view const& strings, rmm::cuda_stream_view stream, @@ -256,21 +241,6 @@ std::unique_ptr is_float( return results; } -bool all_float(strings_column_view const& strings, rmm::cuda_stream_view stream) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - auto transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - return thrust::all_of(rmm::exec_policy(stream), - transformer_itr, - transformer_itr + strings.size(), - thrust::identity()); -} - } // namespace detail // external API @@ -309,17 +279,5 @@ std::unique_ptr is_float(strings_column_view const& strings, return detail::is_float(strings, rmm::cuda_stream_default, mr); } -bool all_integer(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_integer(strings, rmm::cuda_stream_default); -} - -bool all_float(strings_column_view const& strings) -{ - CUDF_FUNC_RANGE(); - return detail::all_float(strings, rmm::cuda_stream_default); -} - } // namespace strings } // namespace cudf diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 803a9b01b07..8a25dc04e6d 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -235,14 +235,12 @@ TEST_F(StringsCharsTest, Integers) auto results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_integer(cudf::strings_column_view(strings1))); cudf::test::strings_column_wrapper strings2( {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_integer(cudf::strings_column_view(strings2))); } TEST_F(StringsCharsTest, Floats) @@ -266,14 +264,12 @@ TEST_F(StringsCharsTest, Floats) cudf::test::fixed_width_column_wrapper expected1( {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - EXPECT_FALSE(cudf::strings::all_float(cudf::strings_column_view(strings1))); cudf::test::strings_column_wrapper strings2( {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); results = cudf::strings::is_float(cudf::strings_column_view(strings2)); cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); - EXPECT_TRUE(cudf::strings::all_float(cudf::strings_column_view(strings2))); } TEST_F(StringsCharsTest, EmptyStrings) @@ -286,10 +282,8 @@ TEST_F(StringsCharsTest, EmptyStrings) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); results = cudf::strings::is_integer(strings_view); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_integer(strings_view)); results = cudf::strings::is_float(strings_view); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - EXPECT_FALSE(cudf::strings::all_float(strings_view)); } TEST_F(StringsCharsTest, FilterCharTypes) From e248ca88dcff799295892a759171f8d4a8cc11a8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 09:47:29 -0600 Subject: [PATCH 02/13] Move `is_integer` and `is_float` from `strings/chars_types.*` to `strings/convert/convert_*` --- .../cudf/strings/char_types/char_types.hpp | 48 ------------- .../cudf/strings/convert/convert_floats.hpp | 24 +++++++ .../cudf/strings/convert/convert_integers.hpp | 24 +++++++ cpp/src/strings/char_types/char_types.cu | 69 ------------------- cpp/src/strings/convert/convert_floats.cu | 41 ++++++++++- cpp/src/strings/convert/convert_integers.cu | 39 ++++++++++- cpp/tests/strings/chars_types_tests.cpp | 3 +- 7 files changed, 128 insertions(+), 120 deletions(-) diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 46a808da967..07b3fba291c 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -146,54 +146,6 @@ std::unique_ptr filter_characters_of_type( string_character_types types_to_keep = string_character_types::ALL_TYPES, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to integers. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7'] - * b = s.is_integer(s) - * b is [true, true, false, false, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Returns a boolean column identifying strings in which all - * characters are valid for conversion to floats. - * - * The output row entry will be set to `true` if the corresponding string element - * has at least one character in [-+0-9eE.]. - * - * @code{.pseudo} - * Example: - * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] - * b = s.is_float(s) - * b is [true, true, false, false, true, true, true] - * @endcode - * - * Any null row results in a null entry for that row in the output column. - * - * @param strings Strings instance for this operation. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New column of boolean results for each string. - */ -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); - /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index cb4746dbf40..951c2615999 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -68,6 +68,30 @@ std::unique_ptr from_floats( column_view const& floats, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to floats. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9eE.]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7', '8.9' '3.7e+5'] + * b = s.is_float(s) + * b is [true, true, false, false, true, true, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of doxygen group } // namespace strings } // namespace cudf diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 8f42deb380d..2d77d51dd08 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -73,6 +73,30 @@ std::unique_ptr from_integers( column_view const& integers, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a boolean column identifying strings in which all + * characters are valid for conversion to integers. + * + * The output row entry will be set to `true` if the corresponding string element + * has at least one character in [-+0-9]. + * + * @code{.pseudo} + * Example: + * s = ['123', '-456', '', 'A', '+7'] + * b = s.is_integer(s) + * b is [true, true, false, false, true] + * @endcode + * + * Any null row results in a null entry for that row in the output column. + * + * @param strings Strings instance for this operation. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New column of boolean results for each string. + */ +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a new integer numeric column parsing hexadecimal values from the * provided strings column. diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index d4230dd72ce..db0319d3ef3 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -186,61 +186,6 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str mr); } -std::unique_ptr is_integer( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_integer(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - -std::unique_ptr is_float( - strings_column_view const& strings, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) -{ - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - // create output column - auto results = make_numeric_column(data_type{type_id::BOOL8}, - strings.size(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), - strings.null_count(), - stream, - mr); - auto d_results = results->mutable_view().data(); - // check strings for valid float chars - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings.size()), - d_results, - [d_column] __device__(size_type idx) { - if (d_column.is_null(idx)) return false; - return string::is_float(d_column.element(idx)); - }); - results->set_null_count(strings.null_count()); - return results; -} - } // namespace detail // external API @@ -265,19 +210,5 @@ std::unique_ptr filter_characters_of_type(strings_column_view const& str strings, types_to_remove, replacement, types_to_keep, rmm::cuda_stream_default, mr); } -std::unique_ptr is_integer(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_integer(strings, rmm::cuda_stream_default, mr); -} - -std::unique_ptr is_float(strings_column_view const& strings, - rmm::mr::device_memory_resource* mr) -{ - CUDF_FUNC_RANGE(); - return detail::is_float(strings, rmm::cuda_stream_default, mr); -} - } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 2bf65976986..b6d99efd51f 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -536,12 +537,50 @@ std::unique_ptr from_floats(column_view const& floats, } // namespace detail // external API - std::unique_ptr from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::from_floats(floats, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_float( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + // check strings for valid float chars + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_float(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} + +} // namespace detail + +// external API +std::unique_ptr is_float(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_float(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 112550fc25b..23b3c4e6763 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -245,7 +246,6 @@ std::unique_ptr from_integers(column_view const& integers, } // namespace detail // external API - std::unique_ptr from_integers(column_view const& integers, rmm::mr::device_memory_resource* mr) { @@ -253,5 +253,42 @@ std::unique_ptr from_integers(column_view const& integers, return detail::from_integers(integers, rmm::cuda_stream_default, mr); } +namespace detail { +std::unique_ptr is_integer( + strings_column_view const& strings, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + auto strings_column = column_device_view::create(strings.parent(), stream); + auto d_column = *strings_column; + // create output column + auto results = make_numeric_column(data_type{type_id::BOOL8}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_results = results->mutable_view().data(); + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings.size()), + d_results, + [d_column] __device__(size_type idx) { + if (d_column.is_null(idx)) return false; + return string::is_integer(d_column.element(idx)); + }); + results->set_null_count(strings.null_count()); + return results; +} +} // namespace detail + +// external API +std::unique_ptr is_integer(strings_column_view const& strings, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_integer(strings, rmm::cuda_stream_default, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 8a25dc04e6d..7976f5d2a39 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -14,9 +14,10 @@ * limitations under the License. */ -#include #include #include +#include +#include #include #include #include From 9ecf929d773465ab0d00a918fd2094ba106fba05 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 10:58:02 -0600 Subject: [PATCH 03/13] Rewrite tests for `chars_types`, `is_integer` and `is_float` --- cpp/tests/strings/chars_types_tests.cpp | 58 ------------------------- cpp/tests/strings/floats_tests.cpp | 35 +++++++++++++++ cpp/tests/strings/integers_tests.cu | 21 +++++++++ 3 files changed, 56 insertions(+), 58 deletions(-) diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 7976f5d2a39..702329edaba 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -16,8 +16,6 @@ #include #include -#include -#include #include #include #include @@ -229,50 +227,6 @@ TEST_F(StringsCharsTest, Numerics) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -TEST_F(StringsCharsTest, Integers) -{ - cudf::test::strings_column_wrapper strings1( - {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); - auto results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - - cudf::test::strings_column_wrapper strings2( - {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); - results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); -} - -TEST_F(StringsCharsTest, Floats) -{ - cudf::test::strings_column_wrapper strings1({"+175", - "-9.8", - "7+2", - "+-4", - "6.7e17", - "-1.2e-5", - "e", - ".e", - "1.e+-2", - "00.00", - "1.0e+1.0", - "1.2.3", - "+", - "--", - ""}); - auto results = cudf::strings::is_float(cudf::strings_column_view(strings1)); - cudf::test::fixed_width_column_wrapper expected1( - {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); - - cudf::test::strings_column_wrapper strings2( - {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); - results = cudf::strings::is_float(cudf::strings_column_view(strings2)); - cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); -} - TEST_F(StringsCharsTest, EmptyStrings) { cudf::test::strings_column_wrapper strings({"", "", ""}); @@ -281,10 +235,6 @@ TEST_F(StringsCharsTest, EmptyStrings) auto results = cudf::strings::all_characters_of_type( strings_view, cudf::strings::string_character_types::ALPHANUM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::is_integer(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::is_float(strings_view); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } TEST_F(StringsCharsTest, FilterCharTypes) @@ -374,14 +324,6 @@ TEST_F(StringsCharsTest, EmptyStringsColumn) EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); EXPECT_EQ(0, results->view().size()); - results = cudf::strings::is_integer(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - - results = cudf::strings::is_float(strings_view); - EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); - EXPECT_EQ(0, results->view().size()); - results = cudf::strings::filter_characters_of_type( strings_view, cudf::strings::string_character_types::NUMERIC); EXPECT_EQ(cudf::type_id::STRING, results->view().type().id()); diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp index b98416d9edd..f7151363d83 100644 --- a/cpp/tests/strings/floats_tests.cpp +++ b/cpp/tests/strings/floats_tests.cpp @@ -27,6 +27,41 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsFloat) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_float(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1({"+175", + "-9.8", + "7+2", + "+-4", + "6.7e17", + "-1.2e-5", + "e", + ".e", + "1.e+-2", + "00.00", + "1.0e+1.0", + "1.2.3", + "+", + "--", + ""}); + results = cudf::strings::is_float(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1( + {1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"}); + results = cudf::strings::is_float(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToFloats32) { std::vector h_strings{"1234", diff --git a/cpp/tests/strings/integers_tests.cu b/cpp/tests/strings/integers_tests.cu index 9e2b9809b26..4d7037051cb 100644 --- a/cpp/tests/strings/integers_tests.cu +++ b/cpp/tests/strings/integers_tests.cu @@ -29,6 +29,27 @@ struct StringsConvertTest : public cudf::test::BaseFixture { }; +TEST_F(StringsConvertTest, IsInteger) +{ + cudf::test::strings_column_wrapper strings; + auto strings_view = cudf::strings_column_view(strings); + auto results = cudf::strings::is_integer(strings_view); + EXPECT_EQ(cudf::type_id::BOOL8, results->view().type().id()); + EXPECT_EQ(0, results->view().size()); + + cudf::test::strings_column_wrapper strings1( + {"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", ""}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings1)); + cudf::test::fixed_width_column_wrapper expected1({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); + + cudf::test::strings_column_wrapper strings2( + {"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045"}); + results = cudf::strings::is_integer(cudf::strings_column_view(strings2)); + cudf::test::fixed_width_column_wrapper expected2({1, 1, 1, 1, 1, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); +} + TEST_F(StringsConvertTest, ToInteger) { std::vector h_strings{ From 0a0e1114b1a590bbe4aba052d699b25c20cf3846 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 11:32:23 -0600 Subject: [PATCH 04/13] Update Python binding for `is_integer` and `is_float` --- java/src/main/native/src/ColumnViewJni.cpp | 1 - .../cudf/cudf/_lib/cpp/strings/char_types.pxd | 8 ----- .../cpp/strings/convert/convert_floats.pxd | 4 +++ .../cpp/strings/convert/convert_integers.pxd | 4 +++ python/cudf/cudf/_lib/strings/char_types.pyx | 34 ------------------- .../_lib/strings/convert/convert_floats.pyx | 30 ++++++++++++++++ .../_lib/strings/convert/convert_integers.pyx | 30 ++++++++++++++++ python/cudf/cudf/core/column/string.py | 6 ++-- python/cudf/cudf/core/tools/datetimes.py | 2 +- 9 files changed, 73 insertions(+), 46 deletions(-) create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_floats.pyx create mode 100644 python/cudf/cudf/_lib/strings/convert/convert_integers.pyx diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index e8474bda1be..1db45e3c4ba 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd index ad675027c10..1ccb736c581 100644 --- a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd @@ -33,11 +33,3 @@ cdef extern from "cudf/strings/char_types/char_types.hpp" \ string_character_types types_to_remove, string_scalar replacement, string_character_types types_to_keep) except + - - cdef unique_ptr[column] is_integer( - column_view source_strings - ) except + - - cdef unique_ptr[column] is_float( - column_view source_strings - ) except + diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd index baee01b8f99..83cb6f7647b 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd @@ -14,3 +14,7 @@ cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \ cdef unique_ptr[column] from_floats( column_view input_col) except + + + cdef unique_ptr[column] is_float( + column_view source_strings + ) except + diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd index 92f99a2f5cb..db513bb2fa2 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd @@ -15,6 +15,10 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \ cdef unique_ptr[column] from_integers( column_view input_col) except + + cdef unique_ptr[column] is_integer( + column_view source_strings + ) except + + cdef unique_ptr[column] hex_to_integers( column_view input_col, data_type output_type) except + diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx index 5d8d1522418..70347daf5fb 100644 --- a/python/cudf/cudf/_lib/strings/char_types.pyx +++ b/python/cudf/cudf/_lib/strings/char_types.pyx @@ -14,8 +14,6 @@ from cudf._lib.cpp.strings.char_types cimport ( all_characters_of_type as cpp_all_characters_of_type, filter_characters_of_type as cpp_filter_characters_of_type, string_character_types as string_character_types, - is_integer as cpp_is_integer, - is_float as cpp_is_float, ) @@ -191,35 +189,3 @@ def is_space(Column source_strings): )) return Column.from_unique_ptr(move(c_result)) - - -def is_integer(Column source_strings): - """ - Returns a Column of boolean values with True for `source_strings` - that have intergers. - """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_is_integer( - source_view - )) - - return Column.from_unique_ptr(move(c_result)) - - -def is_float(Column source_strings): - """ - Returns a Column of boolean values with True for `source_strings` - that have floats. - """ - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_is_float( - source_view - )) - - return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx new file mode 100644 index 00000000000..c4f7a9cc8d9 --- /dev/null +++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx @@ -0,0 +1,30 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column cimport column + +from cudf._lib.cpp.strings.convert cimport ( + is_float as cpp_is_float, +) + + +def is_float(Column source_strings): + """ + Returns a Column of boolean values with True for `source_strings` + that have floats. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + with nogil: + c_result = move(cpp_is_float( + source_view + )) + + return Column.from_unique_ptr(move(c_result)) + diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx new file mode 100644 index 00000000000..7ca5b4d4e0d --- /dev/null +++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx @@ -0,0 +1,30 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.column cimport Column +from cudf._lib.cpp.column.column cimport column + +from cudf._lib.cpp.strings.convert cimport ( + is_integer as cpp_is_integer, +) + + +def is_integer(Column source_strings): + """ + Returns a Column of boolean values with True for `source_strings` + that have intergers. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + with nogil: + c_result = move(cpp_is_integer( + source_view + )) + + return Column.from_unique_ptr(move(c_result)) + diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 81abdd3f66a..3c9e55420e5 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -70,13 +70,15 @@ is_alpha as cpp_is_alpha, is_decimal as cpp_is_decimal, is_digit as cpp_is_digit, - is_float as cpp_is_float, - is_integer as cpp_is_integer, is_lower as cpp_is_lower, is_numeric as cpp_is_numeric, is_space as cpp_isspace, is_upper as cpp_is_upper, ) +from cudf._lib.strings.convert import ( + is_float as cpp_is_float, + is_integer as cpp_is_integer, +) from cudf._lib.strings.combine import ( concatenate as cpp_concatenate, join as cpp_join, diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 4e5e4ce1987..c5a4d95b4ef 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -8,7 +8,7 @@ from pandas.core.tools.datetimes import _unit_map import cudf -from cudf._lib.strings.char_types import is_integer as cpp_is_integer +from cudf._lib.strings.convert import is_integer as cpp_is_integer from cudf.core import column from cudf.core.index import as_index from cudf.utils.dtypes import is_scalar From f80cdcad06c0d8d9664ceaa165e6da819af43963 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 18:34:47 -0600 Subject: [PATCH 05/13] Update copyright year in headers --- python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd | 2 +- python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd index 83cb6f7647b..55a84b60efd 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_floats.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd index db513bb2fa2..6e45d4ba869 100644 --- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view From f338c0196724bf748c1873dacd7a45a2a649decd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 18:35:08 -0600 Subject: [PATCH 06/13] Fix python import path --- python/cudf/cudf/_lib/strings/convert/convert_floats.pyx | 3 +-- python/cudf/cudf/_lib/strings/convert/convert_integers.pyx | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx index c4f7a9cc8d9..195d9b71f6e 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx @@ -8,7 +8,7 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.strings.convert cimport ( +from cudf._lib.cpp.strings.convert.convert_floats cimport ( is_float as cpp_is_float, ) @@ -27,4 +27,3 @@ def is_float(Column source_strings): )) return Column.from_unique_ptr(move(c_result)) - diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx index 7ca5b4d4e0d..d1bae1edd37 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx @@ -8,7 +8,7 @@ from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.strings.convert cimport ( +from cudf._lib.cpp.strings.convert.convert_integers cimport ( is_integer as cpp_is_integer, ) @@ -27,4 +27,3 @@ def is_integer(Column source_strings): )) return Column.from_unique_ptr(move(c_result)) - From 2a38bd7793cfb9d95b02d988328a1156aa77098a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 18:41:03 -0600 Subject: [PATCH 07/13] Another fix for python import path --- python/cudf/cudf/core/column/string.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 3c9e55420e5..95b83feaf30 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -75,10 +75,12 @@ is_space as cpp_isspace, is_upper as cpp_is_upper, ) -from cudf._lib.strings.convert import ( - is_float as cpp_is_float, +from cudf._lib.strings.convert.convert_integers import ( is_integer as cpp_is_integer, ) +from cudf._lib.strings.convert.convert_floats import ( + is_float as cpp_is_float, +) from cudf._lib.strings.combine import ( concatenate as cpp_concatenate, join as cpp_join, From 35f48501a1c8699f511c5f7b7833f0300c4754db Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 18:45:29 -0600 Subject: [PATCH 08/13] One more fix for python import path --- python/cudf/cudf/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index c5a4d95b4ef..ebf8a543db9 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -8,7 +8,7 @@ from pandas.core.tools.datetimes import _unit_map import cudf -from cudf._lib.strings.convert import is_integer as cpp_is_integer +from cudf._lib.strings.convert.convert_integers import is_integer as cpp_is_integer from cudf.core import column from cudf.core.index import as_index from cudf.utils.dtypes import is_scalar From 8f687973e736fc93292e9694ee0ca0c9bfde18ac Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 18:55:52 -0600 Subject: [PATCH 09/13] Fix style check --- python/cudf/cudf/core/tools/datetimes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index ebf8a543db9..66b6560ea04 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -8,7 +8,8 @@ from pandas.core.tools.datetimes import _unit_map import cudf -from cudf._lib.strings.convert.convert_integers import is_integer as cpp_is_integer +from cudf._lib.strings.convert.convert_integers import is_integer\ + as cpp_is_integer from cudf.core import column from cudf.core.index import as_index from cudf.utils.dtypes import is_scalar From cda7d3b9acf4173bcee556ebe8bda55fcd346a6b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 19:13:39 -0600 Subject: [PATCH 10/13] Another fix for style check --- python/cudf/cudf/core/column/string.py | 13 ++++++++----- python/cudf/cudf/core/tools/datetimes.py | 11 +++++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 95b83feaf30..12137ef8532 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -342,7 +342,9 @@ def byte_count(self) -> ParentType: 2 11 dtype: int32 """ - return self._return_or_inplace(cpp_count_bytes(self._column),) + return self._return_or_inplace( + cpp_count_bytes(self._column), + ) @overload def cat(self, sep: str = None, na_rep: str = None) -> str: @@ -444,7 +446,9 @@ def cat(self, others=None, sep=None, na_rep=None): if others is None: data = cpp_join( - self._column, cudf.Scalar(sep), cudf.Scalar(na_rep, "str"), + self._column, + cudf.Scalar(sep), + cudf.Scalar(na_rep, "str"), ) else: other_cols = _get_cols_list(self._parent, others) @@ -4506,7 +4510,7 @@ def is_consonant(self, position) -> ParentType: 0 True 1 False dtype: bool - """ + """ ltype = LetterType.CONSONANT if can_convert_to_column(position): @@ -4643,8 +4647,7 @@ def _expected_types_format(types): class StringColumn(column.ColumnBase): - """Implements operations for Columns of String type - """ + """Implements operations for Columns of String type""" _start_offset: Optional[int] _end_offset: Optional[int] diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 66b6560ea04..1fa604cbc65 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -8,8 +8,9 @@ from pandas.core.tools.datetimes import _unit_map import cudf -from cudf._lib.strings.convert.convert_integers import is_integer\ - as cpp_is_integer +from cudf._lib.strings.convert.convert_integers import ( + is_integer as cpp_is_integer, +) from cudf.core import column from cudf.core.index import as_index from cudf.utils.dtypes import is_scalar @@ -315,12 +316,14 @@ def _process_col(col, unit, dayfirst, infer_datetime_format, format): else: if infer_datetime_format and format is None: format = column.datetime.infer_format( - element=col[0], dayfirst=dayfirst, + element=col[0], + dayfirst=dayfirst, ) elif format is None: format = column.datetime.infer_format(element=col[0]) col = col.as_datetime_column( - dtype=_unit_dtype_map[unit], format=format, + dtype=_unit_dtype_map[unit], + format=format, ) return col From dafda237b1d572ab54c0a328df0131f0cc2aa7dd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 20:07:16 -0600 Subject: [PATCH 11/13] Fix style check again --- python/cudf/cudf/core/column/string.py | 13 +++++-------- python/cudf/cudf/core/tools/datetimes.py | 6 ++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 12137ef8532..95b83feaf30 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -342,9 +342,7 @@ def byte_count(self) -> ParentType: 2 11 dtype: int32 """ - return self._return_or_inplace( - cpp_count_bytes(self._column), - ) + return self._return_or_inplace(cpp_count_bytes(self._column),) @overload def cat(self, sep: str = None, na_rep: str = None) -> str: @@ -446,9 +444,7 @@ def cat(self, others=None, sep=None, na_rep=None): if others is None: data = cpp_join( - self._column, - cudf.Scalar(sep), - cudf.Scalar(na_rep, "str"), + self._column, cudf.Scalar(sep), cudf.Scalar(na_rep, "str"), ) else: other_cols = _get_cols_list(self._parent, others) @@ -4510,7 +4506,7 @@ def is_consonant(self, position) -> ParentType: 0 True 1 False dtype: bool - """ + """ ltype = LetterType.CONSONANT if can_convert_to_column(position): @@ -4647,7 +4643,8 @@ def _expected_types_format(types): class StringColumn(column.ColumnBase): - """Implements operations for Columns of String type""" + """Implements operations for Columns of String type + """ _start_offset: Optional[int] _end_offset: Optional[int] diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 1fa604cbc65..535e497e8dc 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -316,14 +316,12 @@ def _process_col(col, unit, dayfirst, infer_datetime_format, format): else: if infer_datetime_format and format is None: format = column.datetime.infer_format( - element=col[0], - dayfirst=dayfirst, + element=col[0], dayfirst=dayfirst, ) elif format is None: format = column.datetime.infer_format(element=col[0]) col = col.as_datetime_column( - dtype=_unit_dtype_map[unit], - format=format, + dtype=_unit_dtype_map[unit], format=format, ) return col From 1b2187e8c40b85e377fd2456660412e15da378de Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Mar 2021 20:28:44 -0600 Subject: [PATCH 12/13] Fix style check again --- python/cudf/cudf/core/column/string.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 95b83feaf30..3e6db01ea66 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -78,9 +78,7 @@ from cudf._lib.strings.convert.convert_integers import ( is_integer as cpp_is_integer, ) -from cudf._lib.strings.convert.convert_floats import ( - is_float as cpp_is_float, -) +from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float from cudf._lib.strings.combine import ( concatenate as cpp_concatenate, join as cpp_join, From dae6d377a4701dde2a4bddd5575be14b637e6ede Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 17 Mar 2021 08:31:29 -0600 Subject: [PATCH 13/13] Update copyright year in headers --- cpp/include/cudf/strings/char_types/char_types.hpp | 2 +- cpp/include/cudf/strings/convert/convert_floats.hpp | 2 +- cpp/include/cudf/strings/convert/convert_integers.hpp | 2 +- cpp/src/strings/char_types/char_types.cu | 2 +- cpp/src/strings/convert/convert_integers.cu | 2 +- cpp/tests/strings/integers_tests.cu | 2 +- python/cudf/cudf/_lib/cpp/strings/char_types.pxd | 2 +- python/cudf/cudf/_lib/strings/char_types.pyx | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index 07b3fba291c..1f5b6241850 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index 951c2615999..d1e00b36f6f 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 2d77d51dd08..1e2fa80b129 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index db0319d3ef3..0b384ad0631 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 23b3c4e6763..5c5032b5c87 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/strings/integers_tests.cu b/cpp/tests/strings/integers_tests.cu index 4d7037051cb..d6bf03b3f76 100644 --- a/cpp/tests/strings/integers_tests.cu +++ b/cpp/tests/strings/integers_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd index 1ccb736c581..934269c6f25 100644 --- a/python/cudf/cudf/_lib/cpp/strings/char_types.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/char_types.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from cudf._lib.cpp.column.column_view cimport column_view diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx index 70347daf5fb..1890e98f956 100644 --- a/python/cudf/cudf/_lib/strings/char_types.pyx +++ b/python/cudf/cudf/_lib/strings/char_types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2021, NVIDIA CORPORATION. from libcpp cimport bool from libcpp.memory cimport unique_ptr