Skip to content

Commit

Permalink
More time ops (NVIDIA#263)
Browse files Browse the repository at this point in the history
* weekday

* dayofweek

* last_day

* dayofyear

* quarter
  • Loading branch information
revans2 authored Jun 24, 2020
1 parent 5ed2e6d commit 13c6ec5
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 1 deletion.
5 changes: 5 additions & 0 deletions docs/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ Name | Description | Default Value | Incompatibilities
<a name="sql.expression.DateDiff"></a>spark.rapids.sql.expression.DateDiff|datediff|true|None|
<a name="sql.expression.DateSub"></a>spark.rapids.sql.expression.DateSub|Returns the date that is num_days before start_date|true|None|
<a name="sql.expression.DayOfMonth"></a>spark.rapids.sql.expression.DayOfMonth|get the day of the month from a date or timestamp|true|None|
<a name="sql.expression.DayOfWeek"></a>spark.rapids.sql.expression.DayOfWeek|Returns the day of the week (1 = Sunday...7=Saturday)|true|None|
<a name="sql.expression.DayOfYear"></a>spark.rapids.sql.expression.DayOfYear|get the day of the year from a date or timestamp|true|None|
<a name="sql.expression.Divide"></a>spark.rapids.sql.expression.Divide|division|true|None|
<a name="sql.expression.EndsWith"></a>spark.rapids.sql.expression.EndsWith|Ends With|true|None|
<a name="sql.expression.EqualNullSafe"></a>spark.rapids.sql.expression.EqualNullSafe|check if the values are equal including nulls <=>|true|None|
Expand All @@ -141,6 +143,7 @@ Name | Description | Default Value | Incompatibilities
<a name="sql.expression.IsNotNull"></a>spark.rapids.sql.expression.IsNotNull|checks if a value is not null|true|None|
<a name="sql.expression.IsNull"></a>spark.rapids.sql.expression.IsNull|checks if a value is null|true|None|
<a name="sql.expression.KnownFloatingPointNormalized"></a>spark.rapids.sql.expression.KnownFloatingPointNormalized|tag to prevent redundant normalization|true|None|
<a name="sql.expression.LastDay"></a>spark.rapids.sql.expression.LastDay|Returns the last day of the month which the date belongs to|true|None|
<a name="sql.expression.Length"></a>spark.rapids.sql.expression.Length|String Character Length|true|None|
<a name="sql.expression.LessThan"></a>spark.rapids.sql.expression.LessThan|< operator|true|None|
<a name="sql.expression.LessThanOrEqual"></a>spark.rapids.sql.expression.LessThanOrEqual|<= operator|true|None|
Expand All @@ -161,6 +164,7 @@ Name | Description | Default Value | Incompatibilities
<a name="sql.expression.Or"></a>spark.rapids.sql.expression.Or|logical or|true|None|
<a name="sql.expression.Pmod"></a>spark.rapids.sql.expression.Pmod|pmod|true|None|
<a name="sql.expression.Pow"></a>spark.rapids.sql.expression.Pow|lhs ^ rhs|true|None|
<a name="sql.expression.Quarter"></a>spark.rapids.sql.expression.Quarter|returns the quarter of the year for date, in the range 1 to 4.|true|None|
<a name="sql.expression.Rand"></a>spark.rapids.sql.expression.Rand|Generate a random column with i.i.d. uniformly distributed values in [0, 1)|true|None|
<a name="sql.expression.RegExpReplace"></a>spark.rapids.sql.expression.RegExpReplace|RegExpReplace|true|None|
<a name="sql.expression.Remainder"></a>spark.rapids.sql.expression.Remainder|remainder or modulo|true|None|
Expand Down Expand Up @@ -198,6 +202,7 @@ Name | Description | Default Value | Incompatibilities
<a name="sql.expression.UnboundedPreceding$"></a>spark.rapids.sql.expression.UnboundedPreceding$|Special boundary for a window frame, indicating all rows preceding the current row|true|None|
<a name="sql.expression.UnixTimestamp"></a>spark.rapids.sql.expression.UnixTimestamp|Returns the UNIX timestamp of current or specified time|false|This is not 100% compatible with the Spark version because Incorrectly formatted strings and bogus dates produce garbage data instead of null|
<a name="sql.expression.Upper"></a>spark.rapids.sql.expression.Upper|String uppercase operator|false|This is not 100% compatible with the Spark version because in some cases unicode characters change byte width when changing the case. The GPU string conversion does not support these characters. For a full list of unsupported characters see https://github.com/rapidsai/cudf/issues/3132|
<a name="sql.expression.WeekDay"></a>spark.rapids.sql.expression.WeekDay|Returns the day of the week (0 = Monday...6=Sunday)|true|None|
<a name="sql.expression.WindowExpression"></a>spark.rapids.sql.expression.WindowExpression|calculates a return value for every input row of a table based on a group (or "window") of rows|true|None|
<a name="sql.expression.WindowSpecDefinition"></a>spark.rapids.sql.expression.WindowSpecDefinition|specification of a window function, indicating the partitioning-expression, the row ordering, and the width of the window|true|None|
<a name="sql.expression.Year"></a>spark.rapids.sql.expression.Year|get the year from a date or timestamp|true|None|
Expand Down
21 changes: 21 additions & 0 deletions integration_tests/src/main/python/date_time_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,22 @@ def test_second():
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('second(a)'))

def test_quarter():
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, date_gen).selectExpr('quarter(a)'))

def test_weekday():
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, date_gen).selectExpr('weekday(a)'))

def test_dayofweek():
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, date_gen).selectExpr('dayofweek(a)'))

def test_last_day():
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, date_gen).selectExpr('last_day(a)'))

# We have to set the upper/lower limit on IntegerGen so the date_add doesn't overflow
# Python uses proleptic gregorian date which extends Gregorian calendar as it always existed and
# always exist in future. When performing date_sub('0001-01-01', 1), it will blow up because python
Expand Down Expand Up @@ -119,6 +135,11 @@ def test_dayofmonth(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.dayofmonth(f.col('a'))))

@pytest.mark.parametrize('data_gen', date_gens, ids=idfn)
def test_dayofyear(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.dayofyear(f.col('a'))))

@incompat #Really only the string is
@pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
def test_unix_timestamp(data_gen):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,11 +580,21 @@ object GpuOverrides {
(a, conf, p, r) => new UnaryExprMeta[Month](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression = GpuMonth(child)
}),
expr[Quarter](
"returns the quarter of the year for date, in the range 1 to 4.",
(a, conf, p, r) => new UnaryExprMeta[Quarter](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression = GpuQuarter(child)
}),
expr[DayOfMonth](
"get the day of the month from a date or timestamp",
(a, conf, p, r) => new UnaryExprMeta[DayOfMonth](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression = GpuDayOfMonth(child)
}),
expr[DayOfYear](
"get the day of the year from a date or timestamp",
(a, conf, p, r) => new UnaryExprMeta[DayOfYear](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression = GpuDayOfYear(child)
}),
expr[Abs](
"absolute value",
(a, conf, p, r) => new UnaryExprMeta[Abs](a, conf, p, r) {
Expand Down Expand Up @@ -941,6 +951,24 @@ object GpuOverrides {
override def convertToGpu(expr: Expression): GpuExpression =
GpuSecond(expr)
}),
expr[WeekDay](
"Returns the day of the week (0 = Monday...6=Sunday)",
(a, conf, p, r) => new UnaryExprMeta[WeekDay](a, conf, p, r) {
override def convertToGpu(expr: Expression): GpuExpression =
GpuWeekDay(expr)
}),
expr[DayOfWeek](
"Returns the day of the week (1 = Sunday...7=Saturday)",
(a, conf, p, r) => new UnaryExprMeta[DayOfWeek](a, conf, p, r) {
override def convertToGpu(expr: Expression): GpuExpression =
GpuDayOfWeek(expr)
}),
expr[LastDay](
"Returns the last day of the month which the date belongs to",
(a, conf, p, r) => new UnaryExprMeta[LastDay](a, conf, p, r) {
override def convertToGpu(expr: Expression): GpuExpression =
GpuLastDay(expr)
}),
expr[FromUnixTime](
"get the String from a unix timestamp",
(a, conf, p, r) => new UnixTimeExprMeta[FromUnixTime](a, conf, p, r) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,37 @@ trait GpuTimeUnaryExpression extends GpuUnaryExpression with TimeZoneAwareExpres
override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess
}

case class GpuWeekDay(child: Expression)
extends GpuDateUnaryExpression {

override protected def doColumnar(input: GpuColumnVector): GpuColumnVector = {
withResource(Scalar.fromShort(1.toShort)) { one =>
withResource(input.getBase.weekDay()) { weekday => // We want Monday = 0, CUDF Monday = 1
GpuColumnVector.from(weekday.sub(one))
}
}
}
}

case class GpuDayOfWeek(child: Expression)
extends GpuDateUnaryExpression {

override protected def doColumnar(input: GpuColumnVector): GpuColumnVector = {
// Cudf returns Monday = 1, ...
// We want Sunday = 1, ..., so add a day before we extract the day of the week
val nextInts = withResource(Scalar.fromInt(1)) { one =>
withResource(input.getBase.asInts()) { ints =>
ints.add(one)
}
}
withResource(nextInts) { nextInts =>
withResource(nextInts.asTimestampDays()) { daysAgain =>
GpuColumnVector.from(daysAgain.weekDay())
}
}
}
}

case class GpuMinute(child: Expression, timeZoneId: Option[String] = None)
extends GpuTimeUnaryExpression {

Expand Down Expand Up @@ -188,6 +219,21 @@ case class GpuDateDiff(endDate: Expression, startDate: Expression)
}
}

case class GpuQuarter(child: Expression) extends GpuDateUnaryExpression {
override def doColumnar(input: GpuColumnVector): GpuColumnVector = {
val tmp = withResource(Scalar.fromInt(2)) { two =>
withResource(input.getBase.month()) { month =>
month.add(two)
}
}
withResource(tmp) { tmp =>
withResource(Scalar.fromInt(3)) { three =>
GpuColumnVector.from(tmp.div(three))
}
}
}
}

case class GpuMonth(child: Expression) extends GpuDateUnaryExpression {
override def doColumnar(input: GpuColumnVector): GpuColumnVector =
GpuColumnVector.from(input.getBase.month())
Expand All @@ -198,6 +244,11 @@ case class GpuDayOfMonth(child: Expression) extends GpuDateUnaryExpression {
GpuColumnVector.from(input.getBase.day())
}

case class GpuDayOfYear(child: Expression) extends GpuDateUnaryExpression {
override def doColumnar(input: GpuColumnVector): GpuColumnVector =
GpuColumnVector.from(input.getBase.dayOfYear())
}

abstract class UnixTimeExprMeta[A <: BinaryExpression with TimeZoneAwareExpression]
(expr: A, conf: RapidsConf,
parent: Option[RapidsMeta[_, _, _]],
Expand Down Expand Up @@ -463,4 +514,18 @@ case class GpuDateAdd(startDate: Expression, days: Expression) extends GpuDateMa
override def prettyName: String = "date_add"

override def binaryOp: BinaryOp = BinaryOp.ADD
}
}

case class GpuLastDay(startDate: Expression)
extends GpuUnaryExpression with ImplicitCastInputTypes {
override def child: Expression = startDate

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = DateType

override def prettyName: String = "last_day"

override protected def doColumnar(input: GpuColumnVector): GpuColumnVector =
GpuColumnVector.from(input.getBase.lastDayOfMonth())
}

0 comments on commit 13c6ec5

Please sign in to comment.