-
Notifications
You must be signed in to change notification settings - Fork 916
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add is_valid_integer format check API #7094
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/* | ||
* Copyright (c) 2021, Baidu CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <cudf/column/column.hpp> | ||
#include <cudf/strings/strings_column_view.hpp> | ||
|
||
namespace cudf { | ||
namespace strings { | ||
/** | ||
* @addtogroup strings_convert | ||
* @{ | ||
* @file | ||
*/ | ||
|
||
/** | ||
* @brief Returns a boolean column identifying strings in which all characters are valid. | ||
* | ||
* Boolean variable `allow_decimal` indicates that whether we allow the input string data | ||
* is decimal, if `allow_decimal` is false, this function will check that the format is | ||
* [+-]?[0-9]+ like `is_integer`, or itll should check that it matches [+-]?[0-9]+(.[0-9]+) | ||
* similar to `is_float` but without some of the special cases for float (E, Inf, -Inf, NaN). | ||
* | ||
* input_type is used to check whether the data overflows, for example, if input_type is | ||
* `int8_t` and input string data is `128`, then it will return false ,because it out of ranges | ||
* [-128, 127] and overflows. | ||
* | ||
* @param strings Strings instance for this operation. | ||
* @param allow_decimal identification whether we allow the element is decimal or not. | ||
* @param input_type input data type for check overflow. | ||
* @param mr Device memory resource used to allocate the returned column's device memory. | ||
* @return New column of boolean results for each string. | ||
*/ | ||
std::unique_ptr<column> is_valid_element( | ||
strings_column_view const& strings, | ||
bool allow_decimal, | ||
data_type input_type, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); | ||
|
||
/** @} */ // end of doxygen group | ||
} // namespace strings | ||
} // namespace cudf | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Copyright (c) 2019, NVIDIA CORPORATION. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copyright needs to be 2021, and I am not sure you want to give the copyright to NVIDIA for the code that you have written. You should probably include the copyright for the company you work for instead (NOTE: I am not a lawyer) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fixed here to my company copyright , i don't know if this is appropriate , Are all the other contributors from NVIDIA ?What did they do |
||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <cudf/column/column.hpp> | ||
|
||
namespace cudf { | ||
namespace strings { | ||
/** | ||
* @addtogroup strings_convert | ||
* @{ | ||
* @file | ||
*/ | ||
|
||
/** | ||
* @brief Returns a boolean column identifying strings in which all | ||
* characters are valid for conversion to integers. | ||
* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This description should include how the values are the decimal (if allowed) must all be valid decimal characters. |
||
* @param strings Strings instance for this operation. | ||
* @param allow_decimal identification whether the format is decimal or not | ||
* @param input_type input data type for check overflow | ||
* @param mr Device memory resource used to allocate the returned column's device memory. | ||
* @return New column of boolean results for each string. | ||
*/ | ||
std::unique_ptr<column> is_valid_fixed_point( | ||
strings_column_view const& strings, | ||
bool allow_decimal, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the purpose of this parameter anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The difference is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understood that much, I just don't understand why this is a desired option. In my mind this would be like having an
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to match what Spark and Hive do when casting strings to int values. Floating point allows for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What behavior are you looking for? From my perspective, if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so , here we need 2 functions instead of 1 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on what you have discussed, I think we should do this :
and , i will file a new pr for
@revans2 while, for parameter please give me some advice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For CUDF JNI I would rather mirror the underlying C++ API as much as possible. The Spark plugin can decide which CUDF API to call instead of pushing that into cudf. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jrhemstad I'm not sure |
||
data_type input_type, | ||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); | ||
|
||
/** @} */ // end of doxygen group | ||
} // namespace strings | ||
} // namespace cudf | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,203 @@ | ||||||||||||
/* | ||||||||||||
* Copyright (c) 2021, Baidu CORPORATION. | ||||||||||||
* | ||||||||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||||||||
* you may not use this file except in compliance with the License. | ||||||||||||
* You may obtain a copy of the License at | ||||||||||||
* | ||||||||||||
* http://www.apache.org/licenses/LICENSE-2.0 | ||||||||||||
* | ||||||||||||
* Unless required by applicable law or agreed to in writing, software | ||||||||||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||||||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||||||||
* See the License for the specific language governing permissions and | ||||||||||||
* limitations under the License. | ||||||||||||
*/ | ||||||||||||
#include <cudf/column/column.hpp> | ||||||||||||
#include <cudf/column/column_device_view.cuh> | ||||||||||||
#include <cudf/column/column_factories.hpp> | ||||||||||||
#include <cudf/detail/null_mask.hpp> | ||||||||||||
#include <cudf/detail/nvtx/ranges.hpp> | ||||||||||||
#include <cudf/strings/detail/utilities.hpp> | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This header is not required. |
||||||||||||
#include <cudf/strings/string_view.cuh> | ||||||||||||
#include <cudf/strings/strings_column_view.hpp> | ||||||||||||
#include <cudf/utilities/traits.hpp> | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think |
||||||||||||
#include <cudf/utilities/type_dispatcher.hpp> | ||||||||||||
#include <strings/utilities.cuh> | ||||||||||||
#include <strings/utilities.hpp> | ||||||||||||
#include <strings/utilities.cuh> | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
These are not needed. |
||||||||||||
|
||||||||||||
#include <thrust/logical.h> | ||||||||||||
|
||||||||||||
namespace cudf { | ||||||||||||
namespace strings { | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* Check whether the UTF8String is valid when convert data from string to all kinds of integers, | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
* like INT8/16/32/64. for example, if allow_decimal is true, this will return `true, true` when input string | ||||||||||||
* is `1.23, 123`, or this function will return `false, true`, it means firt element is invalid, | ||||||||||||
* while second data is valid. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
* | ||||||||||||
* Note that, in this method we accumulate the result in negative format, and convert it to | ||||||||||||
* positive format at the end, if this string is not started with '-'. This is because min value | ||||||||||||
* is bigger than max value in digits, e.g. Long.MAX_VALUE is '9223372036854775807' and | ||||||||||||
* Long.MIN_VALUE is '-9223372036854775808'. | ||||||||||||
* | ||||||||||||
* This code is mostly copied from LazyLong.parseLong in Hive. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't we need to get permission to copy code from another source into our repo? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The description here is not very accurate , source code is java , while here is c++ , so I don't know if I need permission. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The license in both cases is the same Apache License. I am not a lawyer, so if you need to check with someone about this you can. I believe that taking code from one Apache Licensed project to use in another should not be an issue. You can take Apache Licensed code and use it in a proprietary application without releasing the source code. Giving credit for where the code came from is not required from the Apache license, but it is a nice thing to do. I personally think it would be more accurate to say that the code here is heavily based off of LazyLong.parseLong from Hive, but updated for C++. |
||||||||||||
* | ||||||||||||
* @param d_str String to check. | ||||||||||||
* @param allow_decimal whether we allow the data is Decimal type or not. | ||||||||||||
* @param min_value min_value that corresponds to the type that is checking. | ||||||||||||
* @return true if string has valid integer characters or decimal characters. | ||||||||||||
*/ | ||||||||||||
__device__ bool is_valid_element(string_view const& d_str, bool allow_decimal, long min_value) | ||||||||||||
{ | ||||||||||||
int offset = 0; | ||||||||||||
size_type bytes = d_str.size_bytes(); | ||||||||||||
const char* data = d_str.data(); | ||||||||||||
// strip leading white space | ||||||||||||
while (offset < bytes && data[offset] == ' ') ++offset; | ||||||||||||
if (offset == bytes) return false; | ||||||||||||
|
||||||||||||
int end = bytes - 1; | ||||||||||||
// strip trailing white space | ||||||||||||
while (end > offset && data[end] == ' ') --end; | ||||||||||||
|
||||||||||||
char c_sign = data[offset]; | ||||||||||||
const bool negative = c_sign == '-'; | ||||||||||||
if (negative || c_sign == '+'){ | ||||||||||||
if (end - offset == 0) return false; | ||||||||||||
++offset; | ||||||||||||
} | ||||||||||||
|
||||||||||||
const char separator = '.'; | ||||||||||||
const int radix = 10; | ||||||||||||
const long stop_value = min_value / radix; | ||||||||||||
long result = 0; | ||||||||||||
|
||||||||||||
while (offset <= end) { | ||||||||||||
const char c = data[offset]; | ||||||||||||
++offset; | ||||||||||||
// We allow decimals and will return a truncated integral in that case. | ||||||||||||
// Therefore we won't throw an exception here (checking the fractional | ||||||||||||
// part happens below). | ||||||||||||
if (c == separator && allow_decimal) break; | ||||||||||||
|
||||||||||||
int digit; | ||||||||||||
if (c >= '0' && c <= '9'){ | ||||||||||||
digit = c - '0'; | ||||||||||||
} else { | ||||||||||||
return false; | ||||||||||||
} | ||||||||||||
|
||||||||||||
// We are going to process the new digit and accumulate the result. However, | ||||||||||||
// before doing this, if the result is already smaller than the stopValue which is | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
// (std::numeric_limits<data_type>::min() / radix), then result * 10 will definitely | ||||||||||||
// be smaller than minValue, and we can stop. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
if (result < stop_value) return false; | ||||||||||||
|
||||||||||||
result = result * radix - digit; | ||||||||||||
|
||||||||||||
// Since the previous result is less than or equal to stopValue which is | ||||||||||||
// (std::numeric_limits<data_type>::min() / radix), we can just use `result > 0` | ||||||||||||
// to check overflow. If result overflows, we should stop. | ||||||||||||
if (result > 0) return false; | ||||||||||||
} | ||||||||||||
// This is the case when we've encountered a decimal separator. The fractional | ||||||||||||
// part will not change the number, but we will verify that the fractional part | ||||||||||||
// is well formed. | ||||||||||||
while (offset <= end) { | ||||||||||||
char currentByte = data[offset]; | ||||||||||||
if (currentByte < '0' || currentByte > '9') return false; | ||||||||||||
++offset; | ||||||||||||
} | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This while-loop could probably be replaced with an algorithm like:
|
||||||||||||
|
||||||||||||
if (!negative) { | ||||||||||||
result = -result; | ||||||||||||
if (result < 0) return false; | ||||||||||||
} | ||||||||||||
|
||||||||||||
return true; | ||||||||||||
} | ||||||||||||
|
||||||||||||
namespace detail { | ||||||||||||
namespace { | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move these two lines up so that it includes the |
||||||||||||
|
||||||||||||
/** | ||||||||||||
* @brief The dispatch functions for calculate the min value of input data type | ||||||||||||
* to check overflow. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
* | ||||||||||||
* The output is the min value of spicified type. | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
*/ | ||||||||||||
struct min_value_of_type{ | ||||||||||||
template <typename T> | ||||||||||||
long operator()() | ||||||||||||
{ | ||||||||||||
CUDF_FAIL("Unsupported current data type check."); | ||||||||||||
} | ||||||||||||
}; | ||||||||||||
|
||||||||||||
template <> | ||||||||||||
long min_value_of_type::operator()<int8_t>() { return std::numeric_limits<int8_t>::min(); } | ||||||||||||
|
||||||||||||
template <> | ||||||||||||
long min_value_of_type::operator()<int16_t>() { return std::numeric_limits<int16_t>::min(); } | ||||||||||||
|
||||||||||||
template <> | ||||||||||||
long min_value_of_type::operator()<int32_t>() { return std::numeric_limits<int32_t>::min(); } | ||||||||||||
|
||||||||||||
template <> | ||||||||||||
long min_value_of_type::operator()<int64_t>() { return std::numeric_limits<int64_t>::min(); } | ||||||||||||
|
||||||||||||
} //namespace | ||||||||||||
|
||||||||||||
std::unique_ptr<column> is_valid_element( | ||||||||||||
strings_column_view const& strings, | ||||||||||||
bool allow_decimal, | ||||||||||||
data_type input_type, | ||||||||||||
rmm::cuda_stream_view stream, | ||||||||||||
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) | ||||||||||||
{ | ||||||||||||
auto strings_column = column_device_view::create(strings.parent(), stream); | ||||||||||||
auto d_column = *strings_column; | ||||||||||||
auto d_allow_decimal = allow_decimal; | ||||||||||||
|
||||||||||||
// ready a min_value corresponds to the input type in order to check overflow | ||||||||||||
long d_min_value = cudf::type_dispatcher(input_type, min_value_of_type{}) ; | ||||||||||||
|
||||||||||||
// create output column | ||||||||||||
auto results = make_numeric_column(data_type{type_id::BOOL8}, | ||||||||||||
strings.size(), | ||||||||||||
cudf::detail::copy_bitmask(strings.parent(), stream, mr), | ||||||||||||
strings.null_count(), | ||||||||||||
stream, | ||||||||||||
mr); | ||||||||||||
auto d_results = results->mutable_view().data<bool>(); | ||||||||||||
thrust::transform(rmm::exec_policy(stream), | ||||||||||||
thrust::make_counting_iterator<size_type>(0), | ||||||||||||
thrust::make_counting_iterator<size_type>(strings.size()), | ||||||||||||
d_results, | ||||||||||||
[d_column,d_allow_decimal,d_min_value] __device__(size_type idx) { | ||||||||||||
if (d_column.is_null(idx)) return false; | ||||||||||||
return strings::is_valid_element(d_column.element<string_view>(idx), d_allow_decimal, d_min_value); | ||||||||||||
}); | ||||||||||||
results->set_null_count(strings.null_count()); | ||||||||||||
return results; | ||||||||||||
} | ||||||||||||
|
||||||||||||
} // namespace detail | ||||||||||||
|
||||||||||||
// external API | ||||||||||||
|
||||||||||||
std::unique_ptr<column> is_valid_element(strings_column_view const& strings, | ||||||||||||
bool allow_decimal, | ||||||||||||
data_type input_type, | ||||||||||||
rmm::mr::device_memory_resource* mr) | ||||||||||||
{ | ||||||||||||
CUDF_FUNC_RANGE(); | ||||||||||||
return detail::is_valid_element(strings, allow_decimal, input_type, rmm::cuda_stream_default, mr); | ||||||||||||
} | ||||||||||||
|
||||||||||||
} // namespace strings | ||||||||||||
} // namespace cudf | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,10 +14,14 @@ | |
* limitations under the License. | ||
*/ | ||
|
||
#include <cudf/strings/convert/is_valid_element.hpp> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would suggest moving these to a new test source file |
||
#include <cudf/strings/strings_column_view.hpp> | ||
|
||
#include <tests/strings/utilities.h> | ||
#include <cudf/column/column.hpp> | ||
#include <cudf/strings/char_types/char_types.hpp> | ||
#include <cudf/strings/strings_column_view.hpp> | ||
#include <cudf/types.hpp> | ||
#include <cudf_test/base_fixture.hpp> | ||
#include <cudf_test/column_utilities.hpp> | ||
#include <cudf_test/column_wrapper.hpp> | ||
|
@@ -243,6 +247,67 @@ TEST_F(StringsCharsTest, Integers) | |
EXPECT_TRUE(cudf::strings::all_integer(cudf::strings_column_view(strings2))); | ||
} | ||
|
||
TEST_F(StringsCharsTest, ValidFixedPoint) | ||
{ | ||
// allow_decimal = true | ||
cudf::test::strings_column_wrapper strings1( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
auto results = cudf::strings::is_valid_element(cudf::strings_column_view(strings1), true, cudf::data_type{cudf::type_id::INT8}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected1({0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1); | ||
|
||
cudf::test::strings_column_wrapper strings2( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings2), true, cudf::data_type{cudf::type_id::INT16}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected2({1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); | ||
|
||
cudf::test::strings_column_wrapper strings3( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings3), true, cudf::data_type{cudf::type_id::INT32}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected3({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected3); | ||
|
||
cudf::test::strings_column_wrapper strings4( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings4), true, cudf::data_type{cudf::type_id::INT64}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected4({1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected4); | ||
|
||
// allow_decimal = false | ||
cudf::test::strings_column_wrapper strings5( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings5), false, cudf::data_type{cudf::type_id::INT8}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected5({0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected5); | ||
|
||
cudf::test::strings_column_wrapper strings6( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings6), false, cudf::data_type{cudf::type_id::INT16}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected6({1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected6); | ||
|
||
cudf::test::strings_column_wrapper strings7( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings7), false, cudf::data_type{cudf::type_id::INT32}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected7({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected7); | ||
|
||
cudf::test::strings_column_wrapper strings8( | ||
{"+175", "-34", "9.8", "17+2", "+-14", "1234567890", "67de", "", "1e10", "-", "++", "", "21474836482222"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings8), false, cudf::data_type{cudf::type_id::INT64}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected8({1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected8); | ||
|
||
// second test | ||
cudf::test::strings_column_wrapper strings0( | ||
{"0", "+0", "-0", "1234567890", "-27341132", "+012", "023", "-045", "-1.1", "+1000.1"}); | ||
results = cudf::strings::is_valid_element(cudf::strings_column_view(strings0), true, cudf::data_type{cudf::type_id::INT64}); | ||
cudf::test::fixed_width_column_wrapper<bool> expected0({1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); | ||
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected0); | ||
|
||
} | ||
|
||
TEST_F(StringsCharsTest, Floats) | ||
{ | ||
cudf::test::strings_column_wrapper strings1({"+175", | ||
|
@@ -390,3 +455,4 @@ TEST_F(StringsCharsTest, EmptyStringsColumn) | |
EXPECT_EQ(cudf::type_id::STRING, results->view().type().id()); | ||
EXPECT_EQ(0, results->view().size()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.