Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BooleanType test shouldn't xfail #639

Merged
merged 10 commits into from
Sep 4, 2020
10 changes: 5 additions & 5 deletions integration_tests/src/main/python/arithmetic_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from data_gen import *
from marks import incompat, approximate_float
from pyspark.sql.types import *
from spark_session import with_spark_session
from spark_session import with_spark_session, is_before_spark_310
import pyspark.sql.functions as f

@pytest.mark.parametrize('data_gen', numeric_gens, ids=idfn)
Expand Down Expand Up @@ -360,7 +360,7 @@ def test_expm1(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('expm1(a)'))

@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://issues.apache.org/jira/browse/SPARK-32640')
@approximate_float
@pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
Expand All @@ -369,7 +369,7 @@ def test_log(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('log(a)'))

@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://issues.apache.org/jira/browse/SPARK-32640')
@approximate_float
@pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
Expand All @@ -378,7 +378,7 @@ def test_log1p(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('log1p(a)'))

@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://issues.apache.org/jira/browse/SPARK-32640')
@approximate_float
@pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
Expand All @@ -387,7 +387,7 @@ def test_log2(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('log2(a)'))

@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://issues.apache.org/jira/browse/SPARK-32640')
@approximate_float
@pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
Expand Down
22 changes: 11 additions & 11 deletions integration_tests/src/main/python/cache_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from data_gen import *
from datetime import date
import pyspark.sql.functions as f
from spark_session import with_cpu_session, with_gpu_session
from spark_session import with_cpu_session, with_gpu_session, is_spark_300
from join_test import create_df
from generate_expr_test import four_op_df
from marks import incompat, allow_non_gpu, ignore_order
Expand Down Expand Up @@ -61,8 +61,8 @@ def test_passing_gpuExpr_as_Expr():
@pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
@ignore_order
def test_cache_join(data_gen, join_type):
if data_gen.data_type == BooleanType():
revans2 marked this conversation as resolved.
Show resolved Hide resolved
pytest.xfail("https://github.com/NVIDIA/spark-rapids/issues/350")
if is_spark_300() and data_gen.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")

def do_join(spark):
left, right = create_df(spark, data_gen, 500, 500)
Expand All @@ -81,8 +81,8 @@ def do_join(spark):
@ignore_order
def test_cached_join_filter(data_gen, join_type):
data, filter = data_gen
if data.data_type == BooleanType():
pytest.xfail("https://github.com/NVIDIA/spark-rapids/issues/350")
if is_spark_300() and data.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")

def do_join(spark):
left, right = create_df(spark, data, 500, 500)
Expand All @@ -96,8 +96,8 @@ def do_join(spark):
@pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
@ignore_order
def test_cache_broadcast_hash_join(data_gen, join_type):
if data_gen.data_type == BooleanType():
pytest.xfail("https://github.com/NVIDIA/spark-rapids/issues/350")
if is_spark_300() and data_gen.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")

def do_join(spark):
left, right = create_df(spark, data_gen, 500, 500)
Expand All @@ -116,8 +116,8 @@ def do_join(spark):
@pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
@ignore_order
def test_cache_shuffled_hash_join(data_gen, join_type):
if data_gen.data_type == BooleanType():
pytest.xfail("https://github.com/NVIDIA/spark-rapids/issues/350")
if is_spark_300() and data_gen.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")

def do_join(spark):
left, right = create_df(spark, data_gen, 50, 500)
Expand Down Expand Up @@ -151,8 +151,8 @@ def do_join(spark):
@pytest.mark.parametrize('data_gen', all_gen_restricting_dates, ids=idfn)
@allow_non_gpu('InMemoryTableScanExec', 'DataWritingCommandExec')
def test_cache_posexplode_makearray(spark_tmp_path, data_gen):
if data_gen.data_type == BooleanType():
pytest.xfail("https://github.com/NVIDIA/spark-rapids/issues/350")
if is_spark_300() and data_gen.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")
data_path_cpu = spark_tmp_path + '/PARQUET_DATA_CPU'
data_path_gpu = spark_tmp_path + '/PARQUET_DATA_GPU'
def write_posExplode(data_path):
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/date_time_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
from datetime import date, datetime, timezone
from marks import incompat
from pyspark.sql.types import *
from spark_session import with_spark_session
from spark_session import with_spark_session, is_before_spark_310
import pyspark.sql.functions as f

# We only support literal intervals for TimeSub
vals = [(-584, 1563), (1943, 1101), (2693, 2167), (2729, 0), (44, 1534), (2635, 3319),
(1885, -2828), (0, 2463), (932, 2286), (0, 0)]
@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://issues.apache.org/jira/browse/SPARK-32640')
@pytest.mark.parametrize('data_gen', vals, ids=idfn)
def test_timesub(data_gen):
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/hash_aggregate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pyspark.sql.types import *
from marks import *
import pyspark.sql.functions as f
from spark_session import with_spark_session
from spark_session import with_spark_session, is_spark_300

_no_nans_float_conf = {'spark.rapids.sql.variableFloatAgg.enabled': 'true',
'spark.rapids.sql.hasNans': 'false',
Expand Down Expand Up @@ -316,7 +316,7 @@ def test_hash_agg_with_nan_keys(data_gen):


@pytest.mark.xfail(
condition=with_spark_session(lambda spark : spark.sparkContext.version == "3.0.0"),
condition=with_spark_session(lambda spark : is_spark_300()),
reason="[SPARK-32038][SQL] NormalizeFloatingNumbers should also work on distinct aggregate "
"(https://github.com/apache/spark/pull/28876) "
"Fixed in later Apache Spark releases.")
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from conftest import is_databricks_runtime
from data_gen import *
from marks import ignore_order, allow_non_gpu, incompat
from spark_session import with_spark_session
from spark_session import with_spark_session, is_before_spark_310

all_gen = [StringGen(), ByteGen(), ShortGen(), IntegerGen(), LongGen(),
BooleanGen(), DateGen(), TimestampGen(),
Expand Down Expand Up @@ -152,7 +152,7 @@ def do_join(spark):
@ignore_order
@pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti',
pytest.param('FullOuter', marks=pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://github.com/NVIDIA/spark-rapids/issues/575')),
'Cross'], ids=idfn)
def test_broadcast_join_mixed(join_type):
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/orc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from data_gen import *
from marks import *
from pyspark.sql.types import *
from spark_session import with_cpu_session, with_spark_session
from spark_session import with_cpu_session, with_spark_session, is_before_spark_310

def read_orc_df(data_path):
return lambda spark : spark.read.orc(data_path)
Expand Down Expand Up @@ -200,7 +200,7 @@ def test_compress_write_round_trip(spark_tmp_path, compress):
conf={'spark.sql.orc.compression.codec': compress})

@pytest.mark.xfail(
condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")),
condition=not(is_before_spark_310()),
reason='https://github.com/NVIDIA/spark-rapids/issues/576')
def test_input_meta(spark_tmp_path):
first_data_path = spark_tmp_path + '/ORC_DATA/key=0'
Expand Down
22 changes: 11 additions & 11 deletions integration_tests/src/main/python/qa_nightly_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from conftest import is_databricks_runtime
from spark_session import with_spark_session
from spark_session import with_spark_session, is_before_spark_310
import pytest

SELECT_SQL = [
Expand Down Expand Up @@ -745,16 +745,16 @@
("SELECT test_table.strF as strF, test_table1.strF as strF1 from test_table RIGHT JOIN test_table1 ON test_table.strF=test_table1.strF", "test_table.strF, test_table1.strF RIGHT JOIN test_table1 ON test_table.strF=test_table1.strF"),
("SELECT test_table.dateF as dateF, test_table1.dateF as dateF1 from test_table RIGHT JOIN test_table1 ON test_table.dateF=test_table1.dateF", "test_table.dateF, test_table1.dateF RIGHT JOIN test_table1 ON test_table.dateF=test_table1.dateF"),
("SELECT test_table.timestampF as timestampF, test_table1.timestampF as timestampF1 from test_table RIGHT JOIN test_table1 ON test_table.timestampF=test_table1.timestampF", "test_table.timestampF, test_table1.timestampF RIGHT JOIN test_table1 ON test_table.timestampF=test_table1.timestampF"),
pytest.param(("SELECT test_table.byteF as byteF, test_table1.byteF as byteF1 from test_table FULL JOIN test_table1 ON test_table.byteF=test_table1.byteF", "test_table.byteF, test_table1.byteF FULL JOIN test_table1 ON test_table.byteF=test_table1.byteF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.shortF as shortF, test_table1.shortF as shortF1 from test_table FULL JOIN test_table1 ON test_table.shortF=test_table1.shortF", "test_table.shortF, test_table1.shortF FULL JOIN test_table1 ON test_table.shortF=test_table1.shortF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.intF as intF, test_table1.intF as intF1 from test_table FULL JOIN test_table1 ON test_table.intF=test_table1.intF", "test_table.intF, test_table1.intF FULL JOIN test_table1 ON test_table.intF=test_table1.intF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.longF as longF, test_table1.longF as longF1 from test_table FULL JOIN test_table1 ON test_table.longF=test_table1.longF", "test_table.longF, test_table1.longF FULL JOIN test_table1 ON test_table.longF=test_table1.longF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.floatF as floatF, test_table1.floatF as floatF1 from test_table FULL JOIN test_table1 ON test_table.floatF=test_table1.floatF", "test_table.floatF, test_table1.floatF FULL JOIN test_table1 ON test_table.floatF=test_table1.floatF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.doubleF as doubleF, test_table1.doubleF as doubleF1 from test_table FULL JOIN test_table1 ON test_table.doubleF=test_table1.doubleF", "test_table.doubleF, test_table1.doubleF FULL JOIN test_table1 ON test_table.doubleF=test_table1.doubleF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.booleanF as booleanF, test_table1.booleanF as booleanF1 from test_table FULL JOIN test_table1 ON test_table.booleanF=test_table1.booleanF", "test_table.booleanF, test_table1.booleanF FULL JOIN test_table1 ON test_table.booleanF=test_table1.booleanF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.strF as strF, test_table1.strF as strF1 from test_table FULL JOIN test_table1 ON test_table.strF=test_table1.strF", "test_table.strF, test_table1.strF FULL JOIN test_table1 ON test_table.strF=test_table1.strF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.dateF as dateF, test_table1.dateF as dateF1 from test_table FULL JOIN test_table1 ON test_table.dateF=test_table1.dateF", "test_table.dateF, test_table1.dateF FULL JOIN test_table1 ON test_table.dateF=test_table1.dateF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.timestampF as timestampF, test_table1.timestampF as timestampF1 from test_table FULL JOIN test_table1 ON test_table.timestampF=test_table1.timestampF", "test_table.timestampF, test_table1.timestampF FULL JOIN test_table1 ON test_table.timestampF=test_table1.timestampF"), marks=pytest.mark.xfail(condition=with_spark_session(lambda spark : not(spark.sparkContext.version < "3.1.0")), reason='https://github.com/NVIDIA/spark-rapids/issues/578'))
pytest.param(("SELECT test_table.byteF as byteF, test_table1.byteF as byteF1 from test_table FULL JOIN test_table1 ON test_table.byteF=test_table1.byteF", "test_table.byteF, test_table1.byteF FULL JOIN test_table1 ON test_table.byteF=test_table1.byteF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.shortF as shortF, test_table1.shortF as shortF1 from test_table FULL JOIN test_table1 ON test_table.shortF=test_table1.shortF", "test_table.shortF, test_table1.shortF FULL JOIN test_table1 ON test_table.shortF=test_table1.shortF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.intF as intF, test_table1.intF as intF1 from test_table FULL JOIN test_table1 ON test_table.intF=test_table1.intF", "test_table.intF, test_table1.intF FULL JOIN test_table1 ON test_table.intF=test_table1.intF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.longF as longF, test_table1.longF as longF1 from test_table FULL JOIN test_table1 ON test_table.longF=test_table1.longF", "test_table.longF, test_table1.longF FULL JOIN test_table1 ON test_table.longF=test_table1.longF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.floatF as floatF, test_table1.floatF as floatF1 from test_table FULL JOIN test_table1 ON test_table.floatF=test_table1.floatF", "test_table.floatF, test_table1.floatF FULL JOIN test_table1 ON test_table.floatF=test_table1.floatF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.doubleF as doubleF, test_table1.doubleF as doubleF1 from test_table FULL JOIN test_table1 ON test_table.doubleF=test_table1.doubleF", "test_table.doubleF, test_table1.doubleF FULL JOIN test_table1 ON test_table.doubleF=test_table1.doubleF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.booleanF as booleanF, test_table1.booleanF as booleanF1 from test_table FULL JOIN test_table1 ON test_table.booleanF=test_table1.booleanF", "test_table.booleanF, test_table1.booleanF FULL JOIN test_table1 ON test_table.booleanF=test_table1.booleanF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.strF as strF, test_table1.strF as strF1 from test_table FULL JOIN test_table1 ON test_table.strF=test_table1.strF", "test_table.strF, test_table1.strF FULL JOIN test_table1 ON test_table.strF=test_table1.strF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.dateF as dateF, test_table1.dateF as dateF1 from test_table FULL JOIN test_table1 ON test_table.dateF=test_table1.dateF", "test_table.dateF, test_table1.dateF FULL JOIN test_table1 ON test_table.dateF=test_table1.dateF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578')),
pytest.param(("SELECT test_table.timestampF as timestampF, test_table1.timestampF as timestampF1 from test_table FULL JOIN test_table1 ON test_table.timestampF=test_table1.timestampF", "test_table.timestampF, test_table1.timestampF FULL JOIN test_table1 ON test_table.timestampF=test_table1.timestampF"), marks=pytest.mark.xfail(condition=not(is_before_spark_310()), reason='https://github.com/NVIDIA/spark-rapids/issues/578'))
]

SELECT_PRE_ORDER_SQL=[
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/src/main/python/spark_init_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,5 @@ def get_spark_i_know_what_i_am_doing():
"""
return _spark

def spark_version():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this, but there are a number of other places in the code that are doing a

with_spark_session(spark: spark.sparkContext.version ...

It would probably be good to update all of them to be consistent or do it the same way here too.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have reverted the change to be more like the rest to be consistent

return _spark.version
7 changes: 6 additions & 1 deletion integration_tests/src/main/python/spark_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed
from pyspark.sql import SparkSession, DataFrame
from spark_init_internal import get_spark_i_know_what_i_am_doing
from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version

def _from_scala_map(scala_map):
ret = {}
Expand Down Expand Up @@ -90,3 +90,8 @@ def with_gpu_session(func, conf={}):
copy['spark.rapids.sql.test.allowedNonGpu'] = ','.join(get_non_gpu_allowed())
return with_spark_session(func, conf=copy)

def is_spark_300():
return spark_version() == "3.0.0"

def is_before_spark_310():
return spark_version() < "3.1.0"
Loading