From 198c769104a2130ab2ab481808253969e57df56c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 24 Nov 2021 14:09:30 -0500 Subject: [PATCH 1/3] Revert regex $/EOL end-of-string new-line special case handling --- cpp/src/strings/regex/regex.inl | 5 +---- cpp/tests/strings/contains_tests.cpp | 6 +++--- python/cudf/cudf/tests/test_string.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index bc0679993d0..52cc69c69b8 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -276,10 +276,7 @@ __device__ inline int32_t reprog_device::regexec( } break; case EOL: - if (last_character || - (c == '\n' && (inst->u1.c == '$' || - // edge case where \n appears at the end of the string - pos + 1 == dstr.length()))) { + if (last_character || (c == '\n' && inst->u1.c == '$')) { id_activate = inst->u2.next_id; expanded = true; } diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 229f9e4cc82..6f3896d7292 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -310,21 +310,21 @@ TEST_F(StringsContainsTests, MultiLine) auto expected_contains = cudf::test::fixed_width_column_wrapper({1, 1, 1, 0, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains); results = cudf::strings::contains_re(view, "^abc$"); - expected_contains = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1}); + expected_contains = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains); results = cudf::strings::matches_re(view, "^abc$", cudf::strings::regex_flags::MULTILINE); auto expected_matches = cudf::test::fixed_width_column_wrapper({1, 0, 1, 0, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches); results = cudf::strings::matches_re(view, "^abc$"); - expected_matches = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1}); + expected_matches = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches); results = cudf::strings::count_re(view, "^abc$", cudf::strings::regex_flags::MULTILINE); auto expected_count = cudf::test::fixed_width_column_wrapper({2, 1, 1, 0, 1}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count); results = cudf::strings::count_re(view, "^abc$"); - expected_count = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1}); + expected_count = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count); } diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index cf52c4684c8..cc7be02a024 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1746,7 +1746,7 @@ def test_string_wrap(data, width): ["A B", "1.5", "3,000"], ["23", "³", "⅕", ""], [" ", "\t\r\n ", ""], - ["$", "B", "Aab$", "$$ca", "C$B$", "cat", "cat\n"], + ["$", "B", "Aab$", "$$ca", "C$B$", "cat", "cat\ndog"], ["line\nto be wrapped", "another\nline\nto be wrapped"], ], ) From a96b7d5632fbd9680bcc92cb0bb898c5686df06d Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 30 Nov 2021 10:38:35 -0500 Subject: [PATCH 2/3] fix testIsFloat and testIsDouble --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index a582541a0d4..cf602c26717 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4919,11 +4919,12 @@ void testIsFloat() { try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector isFloat = floatStringCV.isFloat(); ColumnVector floats = floatStringCV.asFloats(); - ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, - Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, - -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, - false, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, Float.NaN, Float.POSITIVE_INFINITY, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY, -0f, 0f, + Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, + 1.2e-24f, 0f, 0f, null, 423f); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, + true, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isFloat); assertColumnsAreEqual(expectedFloats, floats); } @@ -4944,12 +4945,12 @@ void testIsDouble() { try (ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isDouble = doubleStringCV.isFloat(); ColumnVector doubles = doubleStringCV.asDoubles(); - ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, - Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, - Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, - 0d, null, 423d); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, - false, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, + -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, + 1.2e-234d, 0d, 0d, null, 423d); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, + true, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedDoubles, doubles); } From 208dd889cb5d0c16629025c4699b785941f4f2a3 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 30 Nov 2021 10:41:12 -0500 Subject: [PATCH 3/3] revert last commit --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index cf602c26717..a582541a0d4 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4919,12 +4919,11 @@ void testIsFloat() { try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector isFloat = floatStringCV.isFloat(); ColumnVector floats = floatStringCV.asFloats(); - ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, Float.NaN, Float.POSITIVE_INFINITY, - Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY, -0f, 0f, - Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, - 1.2e-24f, 0f, 0f, null, 423f); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, - true, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, + Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, + -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, + false, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isFloat); assertColumnsAreEqual(expectedFloats, floats); } @@ -4945,12 +4944,12 @@ void testIsDouble() { try (ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isDouble = doubleStringCV.isFloat(); ColumnVector doubles = doubleStringCV.asDoubles(); - ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, Double.NaN, - Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, - -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, - 1.2e-234d, 0d, 0d, null, 423d); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true, - true, true, true, true, true, true, true, true, false, false, null, true)) { + ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, + Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, + 0d, null, 423d); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, + false, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedDoubles, doubles); }