Skip to content

Commit

Permalink
Add support for more date formats and remove incompat from to_unix_ti…
Browse files Browse the repository at this point in the history
…mestamp

Signed-off-by: Andy Grove <[email protected]>
  • Loading branch information
andygrove committed Nov 17, 2020
1 parent 9021145 commit 94bfbf9
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 13 deletions.
19 changes: 9 additions & 10 deletions integration_tests/src/main/python/date_time_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,46 +160,45 @@ def test_dayofyear(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.dayofyear(f.col('a'))))

@incompat #Really only the string is
@pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
def test_unix_timestamp(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))))

@incompat #Really only the string is
@pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
def test_to_unix_timestamp(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr("to_unix_timestamp(a)"))

@incompat #Really only the string is
@pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
def test_unix_timestamp_improved(data_gen):
conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true"}
conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true",
"spark.sql.legacy.timeParserPolicy": "EXCEPTION"}
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))), conf)

@incompat #Really only the string is
@pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
def test_to_unix_timestamp_improved(data_gen):
conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true"}
conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true",
"spark.sql.legacy.timeParserPolicy": "EXCEPTION"}
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr("to_unix_timestamp(a)"), conf)

str_date_and_format_gen = [pytest.param(StringGen('[0-9]{4}/[01][0-9]'),'yyyy/MM', marks=pytest.mark.xfail(reason="cudf does no checks")),
(StringGen('[0-9]{4}/[01][12]/[0-2][1-8]'),'yyyy/MM/dd'),
(ConvertGen(DateGen(nullable=False), lambda d: d.strftime('%Y/%m').zfill(7), data_type=StringType()), 'yyyy/MM')]

@incompat
@pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
def test_string_to_unix_timestamp(data_gen, date_form):
print("date: " + date_form)
print("test_string_to_unix_timestamp date_form: {}".format(date_form))
conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true",
"spark.sql.legacy.timeParserPolicy": "EXCEPTION"}
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen, seed=1).selectExpr("to_unix_timestamp(a, '{}')".format(date_form)))
lambda spark : unary_op_df(spark, data_gen, seed=1).selectExpr("to_unix_timestamp(a, '{}')".format(date_form)), conf)

@incompat
@pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
def test_string_unix_timestamp(data_gen, date_form):
print("test_string_unix_timestamp date_form: {}".format(date_form))
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen, seed=1).select(f.unix_timestamp(f.col('a'), date_form)))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1075,9 +1075,7 @@ object GpuOverrides {
GpuToUnixTimestamp(lhs, rhs, sparkFormat, strfFormat)
}
}
})
.incompat("Incorrectly formatted strings and bogus dates produce garbage data" +
" instead of null"),
}),
expr[UnixTimestamp](
"Returns the UNIX timestamp of current or specified time",
(a, conf, p, r) => new UnixTimeExprMeta[UnixTimestamp](a, conf, p, r){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ object GpuToTimestamp {
/** We are compatible with Spark for these formats */
val COMPATIBLE_FORMATS = Seq(
"yyyy-MM-dd",
"yyyy-MM",
"yyyy/MM/dd",
"yyyy/MM",
"dd/MM/yyyy",
"yyyy-MM-dd HH:mm:ss"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ class ParseDateTimeSuite extends SparkQueryCompareTestSuite {
df => df.withColumn("c1", unix_timestamp(col("c0"), "yyyy-MM-dd"))
}

testSparkResultsAreEqual("unix_timestamp parse yyyy/MM",
timestampsAsStrings,
new SparkConf().set(SQLConf.LEGACY_TIME_PARSER_POLICY.key, "CORRECTED")) {
df => df.withColumn("c1", unix_timestamp(col("c0"), "yyyy/MM"))
}

testSparkResultsAreEqual("to_unix_timestamp parse yyyy/MM",
timestampsAsStrings,
new SparkConf().set(SQLConf.LEGACY_TIME_PARSER_POLICY.key, "CORRECTED")) {
df => {
df.createOrReplaceTempView("df")
df.sqlContext.sql("SELECT c0, to_unix_timestamp(c0, 'yyyy/MM') FROM df")
}
}

testSparkResultsAreEqual("unix_timestamp parse timestamp",
timestampsAsStrings,
new SparkConf().set(SQLConf.LEGACY_TIME_PARSER_POLICY.key, "CORRECTED")) {
Expand Down Expand Up @@ -128,6 +143,12 @@ class ParseDateTimeSuite extends SparkQueryCompareTestSuite {
"31/12/1999",
"31/12/1999 11:59:59.999",
"1999-12-31",
"1999/12/31",
"1999-12",
"1999/12",
"1975/06",
"1975/06/18",
"1975/06/18 06:48:57",
"1999-12-31\n",
"\t1999-12-31",
"\n1999-12-31",
Expand Down

0 comments on commit 94bfbf9

Please sign in to comment.