diff --git a/docs/configs.md b/docs/configs.md index bf6f8bf9824..e726c36ec2e 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -118,6 +118,8 @@ Name | Description | Default Value | Incompatibilities spark.rapids.sql.expression.DateDiff|datediff|true|None| spark.rapids.sql.expression.DateSub|Returns the date that is num_days before start_date|true|None| spark.rapids.sql.expression.DayOfMonth|get the day of the month from a date or timestamp|true|None| +spark.rapids.sql.expression.DayOfWeek|Returns the day of the week (1 = Sunday...7=Saturday)|true|None| +spark.rapids.sql.expression.DayOfYear|get the day of the year from a date or timestamp|true|None| spark.rapids.sql.expression.Divide|division|true|None| spark.rapids.sql.expression.EndsWith|Ends With|true|None| spark.rapids.sql.expression.EqualNullSafe|check if the values are equal including nulls <=>|true|None| @@ -141,6 +143,7 @@ Name | Description | Default Value | Incompatibilities spark.rapids.sql.expression.IsNotNull|checks if a value is not null|true|None| spark.rapids.sql.expression.IsNull|checks if a value is null|true|None| spark.rapids.sql.expression.KnownFloatingPointNormalized|tag to prevent redundant normalization|true|None| +spark.rapids.sql.expression.LastDay|Returns the last day of the month which the date belongs to|true|None| spark.rapids.sql.expression.Length|String Character Length|true|None| spark.rapids.sql.expression.LessThan|< operator|true|None| spark.rapids.sql.expression.LessThanOrEqual|<= operator|true|None| @@ -161,6 +164,7 @@ Name | Description | Default Value | Incompatibilities spark.rapids.sql.expression.Or|logical or|true|None| spark.rapids.sql.expression.Pmod|pmod|true|None| spark.rapids.sql.expression.Pow|lhs ^ rhs|true|None| +spark.rapids.sql.expression.Quarter|returns the quarter of the year for date, in the range 1 to 4.|true|None| spark.rapids.sql.expression.Rand|Generate a random column with i.i.d. uniformly distributed values in [0, 1)|true|None| spark.rapids.sql.expression.RegExpReplace|RegExpReplace|true|None| spark.rapids.sql.expression.Remainder|remainder or modulo|true|None| @@ -198,6 +202,7 @@ Name | Description | Default Value | Incompatibilities spark.rapids.sql.expression.UnboundedPreceding$|Special boundary for a window frame, indicating all rows preceding the current row|true|None| spark.rapids.sql.expression.UnixTimestamp|Returns the UNIX timestamp of current or specified time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null| spark.rapids.sql.expression.Upper|String uppercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132| +spark.rapids.sql.expression.WeekDay|Returns the day of the week (0 = Monday...6=Sunday)|true|None| spark.rapids.sql.expression.WindowExpression|calculates a return value for every input row of a table based on a group (or "window") of rows|true|None| spark.rapids.sql.expression.WindowSpecDefinition|specification of a window function, indicating the partitioning-expression, the row ordering, and the width of the window|true|None| spark.rapids.sql.expression.Year|get the year from a date or timestamp|true|None| diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index c0c3cf55659..3b881531fb1 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -43,6 +43,22 @@ def test_second(): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('second(a)')) +def test_quarter(): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, date_gen).selectExpr('quarter(a)')) + +def test_weekday(): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, date_gen).selectExpr('weekday(a)')) + +def test_dayofweek(): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, date_gen).selectExpr('dayofweek(a)')) + +def test_last_day(): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, date_gen).selectExpr('last_day(a)')) + # We have to set the upper/lower limit on IntegerGen so the date_add doesn't overflow # Python uses proleptic gregorian date which extends Gregorian calendar as it always existed and # always exist in future. When performing date_sub('0001-01-01', 1), it will blow up because python @@ -119,6 +135,11 @@ def test_dayofmonth(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).select(f.dayofmonth(f.col('a')))) +@pytest.mark.parametrize('data_gen', date_gens, ids=idfn) +def test_dayofyear(data_gen): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, data_gen).select(f.dayofyear(f.col('a')))) + @incompat #Really only the string is @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn) def test_unix_timestamp(data_gen): diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index bb546d36c55..fad61a1f0f6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -580,11 +580,21 @@ object GpuOverrides { (a, conf, p, r) => new UnaryExprMeta[Month](a, conf, p, r) { override def convertToGpu(child: Expression): GpuExpression = GpuMonth(child) }), + expr[Quarter]( + "returns the quarter of the year for date, in the range 1 to 4.", + (a, conf, p, r) => new UnaryExprMeta[Quarter](a, conf, p, r) { + override def convertToGpu(child: Expression): GpuExpression = GpuQuarter(child) + }), expr[DayOfMonth]( "get the day of the month from a date or timestamp", (a, conf, p, r) => new UnaryExprMeta[DayOfMonth](a, conf, p, r) { override def convertToGpu(child: Expression): GpuExpression = GpuDayOfMonth(child) }), + expr[DayOfYear]( + "get the day of the year from a date or timestamp", + (a, conf, p, r) => new UnaryExprMeta[DayOfYear](a, conf, p, r) { + override def convertToGpu(child: Expression): GpuExpression = GpuDayOfYear(child) + }), expr[Abs]( "absolute value", (a, conf, p, r) => new UnaryExprMeta[Abs](a, conf, p, r) { @@ -941,6 +951,24 @@ object GpuOverrides { override def convertToGpu(expr: Expression): GpuExpression = GpuSecond(expr) }), + expr[WeekDay]( + "Returns the day of the week (0 = Monday...6=Sunday)", + (a, conf, p, r) => new UnaryExprMeta[WeekDay](a, conf, p, r) { + override def convertToGpu(expr: Expression): GpuExpression = + GpuWeekDay(expr) + }), + expr[DayOfWeek]( + "Returns the day of the week (1 = Sunday...7=Saturday)", + (a, conf, p, r) => new UnaryExprMeta[DayOfWeek](a, conf, p, r) { + override def convertToGpu(expr: Expression): GpuExpression = + GpuDayOfWeek(expr) + }), + expr[LastDay]( + "Returns the last day of the month which the date belongs to", + (a, conf, p, r) => new UnaryExprMeta[LastDay](a, conf, p, r) { + override def convertToGpu(expr: Expression): GpuExpression = + GpuLastDay(expr) + }), expr[FromUnixTime]( "get the String from a unix timestamp", (a, conf, p, r) => new UnixTimeExprMeta[FromUnixTime](a, conf, p, r) { diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala index 84cb05dbc05..51a17d7715f 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala @@ -48,6 +48,37 @@ trait GpuTimeUnaryExpression extends GpuUnaryExpression with TimeZoneAwareExpres override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess } +case class GpuWeekDay(child: Expression) + extends GpuDateUnaryExpression { + + override protected def doColumnar(input: GpuColumnVector): GpuColumnVector = { + withResource(Scalar.fromShort(1.toShort)) { one => + withResource(input.getBase.weekDay()) { weekday => // We want Monday = 0, CUDF Monday = 1 + GpuColumnVector.from(weekday.sub(one)) + } + } + } +} + +case class GpuDayOfWeek(child: Expression) + extends GpuDateUnaryExpression { + + override protected def doColumnar(input: GpuColumnVector): GpuColumnVector = { + // Cudf returns Monday = 1, ... + // We want Sunday = 1, ..., so add a day before we extract the day of the week + val nextInts = withResource(Scalar.fromInt(1)) { one => + withResource(input.getBase.asInts()) { ints => + ints.add(one) + } + } + withResource(nextInts) { nextInts => + withResource(nextInts.asTimestampDays()) { daysAgain => + GpuColumnVector.from(daysAgain.weekDay()) + } + } + } +} + case class GpuMinute(child: Expression, timeZoneId: Option[String] = None) extends GpuTimeUnaryExpression { @@ -188,6 +219,21 @@ case class GpuDateDiff(endDate: Expression, startDate: Expression) } } +case class GpuQuarter(child: Expression) extends GpuDateUnaryExpression { + override def doColumnar(input: GpuColumnVector): GpuColumnVector = { + val tmp = withResource(Scalar.fromInt(2)) { two => + withResource(input.getBase.month()) { month => + month.add(two) + } + } + withResource(tmp) { tmp => + withResource(Scalar.fromInt(3)) { three => + GpuColumnVector.from(tmp.div(three)) + } + } + } +} + case class GpuMonth(child: Expression) extends GpuDateUnaryExpression { override def doColumnar(input: GpuColumnVector): GpuColumnVector = GpuColumnVector.from(input.getBase.month()) @@ -198,6 +244,11 @@ case class GpuDayOfMonth(child: Expression) extends GpuDateUnaryExpression { GpuColumnVector.from(input.getBase.day()) } +case class GpuDayOfYear(child: Expression) extends GpuDateUnaryExpression { + override def doColumnar(input: GpuColumnVector): GpuColumnVector = + GpuColumnVector.from(input.getBase.dayOfYear()) +} + abstract class UnixTimeExprMeta[A <: BinaryExpression with TimeZoneAwareExpression] (expr: A, conf: RapidsConf, parent: Option[RapidsMeta[_, _, _]], @@ -463,4 +514,18 @@ case class GpuDateAdd(startDate: Expression, days: Expression) extends GpuDateMa override def prettyName: String = "date_add" override def binaryOp: BinaryOp = BinaryOp.ADD -} \ No newline at end of file +} + +case class GpuLastDay(startDate: Expression) + extends GpuUnaryExpression with ImplicitCastInputTypes { + override def child: Expression = startDate + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType) + + override def dataType: DataType = DateType + + override def prettyName: String = "last_day" + + override protected def doColumnar(input: GpuColumnVector): GpuColumnVector = + GpuColumnVector.from(input.getBase.lastDayOfMonth()) +}