Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Temporarily disable timestamp read tests for Parquet and ORC #3758

Merged
merged 1 commit into from
Oct 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions integration_tests/src/main/python/orc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def read_orc_sql(data_path):
coalescing_orc_file_reader_conf = {'spark.rapids.sql.format.orc.reader.type': 'COALESCING'}
reader_opt_confs = [original_orc_file_reader_conf, multithreaded_orc_file_reader_conf, coalescing_orc_file_reader_conf]

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('name', ['timestamp-date-test.orc'])
@pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
@pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
Expand Down Expand Up @@ -120,6 +121,7 @@ def test_orc_fallback(spark_tmp_path, read_func, disable_conf):
conf={disable_conf: 'false',
"spark.sql.sources.useV1SourceList": "orc"})

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.order(2)
@pytest.mark.parametrize('orc_gens', orc_gens_list, ids=idfn)
@pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
Expand All @@ -145,6 +147,7 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e
# timestamp_gen
TimestampGen(start=datetime(1970, 1, 1, tzinfo=timezone.utc))]

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.order(2)
@pytest.mark.parametrize('orc_gen', orc_pred_push_gens, ids=idfn)
@pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
Expand Down Expand Up @@ -200,6 +203,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
lambda spark : spark.read.orc(data_path),
conf=all_confs)

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
@pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
Expand All @@ -225,6 +229,7 @@ def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
conf=all_confs)

# In this we are reading the data, but only reading the key the data was partitioned by
@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
@pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reader_confs):
Expand Down Expand Up @@ -378,6 +383,7 @@ def test_missing_column_names_filter(spark_tmp_table_factory, reader_confs):
reader_confs)


@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
@pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
Expand Down Expand Up @@ -410,6 +416,7 @@ def test_read_struct_without_stream(spark_tmp_path):
lambda spark : spark.read.orc(data_path))


@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('orc_gen', flattened_orc_gens, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
@pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
Expand Down
9 changes: 9 additions & 0 deletions integration_tests/src/main/python/parquet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def read_parquet_sql(data_path):
reader_opt_confs = [original_parquet_file_reader_conf, multithreaded_parquet_file_reader_conf,
coalesce_parquet_file_reader_conf]

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('parquet_gens', parquet_gens_list, ids=idfn)
@pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
Expand Down Expand Up @@ -133,6 +134,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
# timestamp_gen
TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))] + decimal_gens

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('parquet_gen', parquet_pred_push_gens, ids=idfn)
@pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
Expand Down Expand Up @@ -174,6 +176,7 @@ def test_ts_read_round_trip_nested(gen, spark_tmp_path, ts_write, ts_rebase, v1_
lambda spark : spark.read.parquet(data_path),
conf=all_confs)

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
# Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with
# timestamp_gen
@pytest.mark.parametrize('gen', [TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))], ids=idfn)
Expand All @@ -198,6 +201,7 @@ def readParquetCatchException(spark, data_path):
df = spark.read.parquet(data_path).collect()
assert e_info.match(r".*SparkUpgradeException.*")

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
# Once https://github.com/NVIDIA/spark-rapids/issues/1126 is fixed nested timestamps and dates should be added in
# Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with
# timestamp_gen
Expand Down Expand Up @@ -240,6 +244,7 @@ def test_decimal_read_legacy(spark_tmp_path, parquet_gens, read_func, reader_con
pytest.param([timestamp_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133')),
pytest.param([date_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133'))]

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('parquet_gens', parquet_gens_legacy_list, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand All @@ -254,6 +259,7 @@ def test_read_round_trip_legacy(spark_tmp_path, parquet_gens, v1_enabled_list, r
lambda spark : spark.read.parquet(data_path),
conf=all_confs)

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
Expand Down Expand Up @@ -326,6 +332,7 @@ def test_read_schema_missing_cols(spark_tmp_path, v1_enabled_list, reader_confs)
lambda spark : spark.read.parquet(data_path),
conf=all_confs)

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs):
Expand All @@ -350,6 +357,7 @@ def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs):
lambda spark : spark.read.option('mergeSchema', 'true').parquet(data_path),
conf=all_confs)

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
def test_read_merge_schema_from_conf(spark_tmp_path, v1_enabled_list, reader_confs):
Expand Down Expand Up @@ -480,6 +488,7 @@ def test_small_file_memory(spark_tmp_path, v1_enabled_list):
[["ar", ArrayGen(StructGen([["str_2", StringGen()]]))]])
]

@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
@pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn)
@pytest.mark.parametrize('reader_confs', reader_opt_confs)
@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
Expand Down