Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace toTitle with capitalize for GpuInitCap #2838

Merged
merged 15 commits into from
Jul 7, 2021
13 changes: 1 addition & 12 deletions integration_tests/src/main/python/string_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -314,9 +314,6 @@ def test_length():
'CHAR_LENGTH(a)',
'CHARACTER_LENGTH(a)'))

# Once the xfail is fixed this can replace test_initcap_space
@incompat
@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/120')
def test_initcap():
jlowe marked this conversation as resolved.
Show resolved Hide resolved
# Because we don't use the same unicode version we need to limit
# the charicter set to something more reasonable
Expand All @@ -327,14 +324,6 @@ def test_initcap():
lambda spark: unary_op_df(spark, gen).select(
f.initcap(f.col('a'))))

@incompat
def test_initcap_space():
# we see a lot more space delim
gen = StringGen('([aAbB]{0,5}[ ]{1,2}){1,5}')
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, gen).select(
f.initcap(f.col('a'))))

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/119')
def test_like_null_xfail():
gen = mk_str_gen('.{0,3}a[|b*.$\r\n]{0,2}c.{0,3}')\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1250,10 +1250,7 @@ object GpuOverrides {
ExprChecks.unaryProjectNotLambdaInputMatchesOutput(TypeSig.STRING, TypeSig.STRING),
(a, conf, p, r) => new UnaryExprMeta[InitCap](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression = GpuInitCap(child)
}).incompat(CASE_MODIFICATION_INCOMPAT + " Spark also only sees the space character as " +
"a word deliminator, but this will capitalize any character after a non-alphabetic " +
"character. The behavior will be aligned to match Spark in the future per " +
"https://github.com/NVIDIA/spark-rapids/issues/2786."),
}).incompat(CASE_MODIFICATION_INCOMPAT),
jlowe marked this conversation as resolved.
Show resolved Hide resolved
expr[Log](
"Natural log",
ExprChecks.mathUnary,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,10 @@ case class GpuInitCap(child: Expression) extends GpuUnaryExpression with Implici
override def inputTypes: Seq[DataType] = Seq(StringType)
override def dataType: DataType = StringType
override protected def doColumnar(input: GpuColumnVector): ColumnVector =
input.getBase.toTitle
withResource(Scalar.fromString(" ")) { space =>
// Spark only sees the space character as a word deliminator.
input.getBase.capitalize(space)
}
}

case class GpuStringReplace(
Expand Down