diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py index 789a261b52b..618004ee60d 100644 --- a/integration_tests/src/main/python/orc_test.py +++ b/integration_tests/src/main/python/orc_test.py @@ -845,12 +845,16 @@ def test_simple_partitioned_read_for_multithreaded_combining(spark_tmp_path, kee assert_gpu_and_cpu_are_equal_collect( lambda spark: spark.read.orc(data_path), conf=all_confs) -@pytest.mark.skipif(is_spark_340_or_later() and (not (is_databricks_runtime() and spark_version() == "3.4.1")), reason="https://github.com/NVIDIA/spark-rapids/issues/8324") + +@pytest.mark.skipif(is_spark_340_or_later() and not is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/8324") @pytest.mark.parametrize('data_file', ['fixed-length-char-column-from-hive.orc']) @pytest.mark.parametrize('reader', [read_orc_df, read_orc_sql]) def test_read_hive_fixed_length_char(std_input_path, data_file, reader): """ Test that a file containing CHAR data is readable as STRING. + The plugin behaviour matches all Spark versions prior to 3.4.0, + and Databricks version 13.3 (i.e. 3.4.1) and after. """ assert_gpu_and_cpu_are_equal_collect( reader(std_input_path + '/' + data_file), @@ -858,20 +862,30 @@ def test_read_hive_fixed_length_char(std_input_path, data_file, reader): @allow_non_gpu("ProjectExec") -@pytest.mark.skipif(is_before_spark_340() or (is_databricks_runtime() and spark_version() == "3.4.1"), reason="https://github.com/NVIDIA/spark-rapids/issues/8324") +@pytest.mark.skipif(is_before_spark_340(), + reason="https://github.com/NVIDIA/spark-rapids/issues/8324") +@pytest.mark.skipif(is_databricks_version_or_later(13, 3), + reason="The SELECT * query does not involve ProjectExec " + "on Databricks versions >= 13.3. " + "Can't test Project fallback without ProjectExec.") @pytest.mark.parametrize('data_file', ['fixed-length-char-column-from-hive.orc']) @pytest.mark.parametrize('reader', [read_orc_df, read_orc_sql]) def test_project_fallback_when_reading_hive_fixed_length_char(std_input_path, data_file, reader): """ - Test that a file containing CHAR data is readable as STRING. + Test that reading a file containing fixed-width CHAR data (e.g. CHAR(3)) as a STRING column + causes the ProjectExec to fall back to CPU. Note: This test can be removed when https://github.com/NVIDIA/spark-rapids/issues/8324 is resolved. + + This test does not apply to Databricks >= 13.3, because there would be + no ProjectExec to fall back to CPU. """ assert_gpu_fallback_collect( reader(std_input_path + '/' + data_file), cpu_fallback_class_name="ProjectExec", conf={}) + @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql]) @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) @pytest.mark.parametrize('orc_impl', ["native", "hive"])