diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 27c13af1e4d..4fa4827ac52 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -619,6 +619,23 @@ + + release341db + + + buildver + 341db + + + + + com.nvidia + rapids-4-spark-delta-spark341db_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + + + release333 diff --git a/integration_tests/src/main/python/delta_lake_merge_test.py b/integration_tests/src/main/python/delta_lake_merge_test.py index 1d43259434b..0ba63380aba 100644 --- a/integration_tests/src/main/python/delta_lake_merge_test.py +++ b/integration_tests/src/main/python/delta_lake_merge_test.py @@ -97,7 +97,7 @@ def checker(data_path, do_merge): merge_sql=merge_sql, check_func=checker) -@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", *delta_meta_allow) +@allow_non_gpu("ExecutedCommandExec,BroadcastHashJoinExec,ColumnarToRowExec,BroadcastExchangeExec,DataWritingCommandExec", delta_write_fallback_allow, *delta_meta_allow) @delta_lake @ignore_order @pytest.mark.skipif(is_databricks_runtime() and spark_version() < "3.3.2", reason="NOT MATCHED BY SOURCE added in DBR 12.2") diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py index 6ec5ec88fd3..b51fa5a55ef 100644 --- a/integration_tests/src/main/python/fastparquet_compatibility_test.py +++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py @@ -17,7 +17,7 @@ from asserts import assert_gpu_and_cpu_are_equal_collect from data_gen import * from fastparquet_utils import get_fastparquet_result_canonicalizer -from spark_session import spark_version, with_cpu_session, with_gpu_session +from spark_session import is_databricks_runtime, spark_version, with_cpu_session, with_gpu_session def fastparquet_unavailable(): @@ -107,8 +107,12 @@ def read_with_fastparquet_or_plugin(spark): pytest.param(IntegerGen(nullable=True), marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")), LongGen(nullable=False), - FloatGen(nullable=False), - DoubleGen(nullable=False), + pytest.param(FloatGen(nullable=False), + marks=pytest.mark.xfail(is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/9778")), + pytest.param(DoubleGen(nullable=False), + marks=pytest.mark.xfail(is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/9778")), StringGen(nullable=False), pytest.param(DecimalGen(nullable=False), marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per " @@ -131,8 +135,11 @@ def read_with_fastparquet_or_plugin(spark): marks=pytest.mark.xfail(reason="Conversion from Pandas dataframe (read with fastparquet) to Spark dataframe " "fails: \"Unable to infer the type of the field a\".")), - StructGen(children=[("first", IntegerGen(nullable=False)), - ("second", FloatGen(nullable=False))], nullable=False) + pytest.param( + StructGen(children=[("first", IntegerGen(nullable=False)), + ("second", FloatGen(nullable=False))], nullable=False), + marks=pytest.mark.xfail(is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/9778")), ], ids=idfn) def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path): """ @@ -176,8 +183,12 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path): LongGen(nullable=False), pytest.param(LongGen(nullable=True), marks=pytest.mark.xfail(reason="Nullables cause merge errors, when converting to Spark dataframe")), - FloatGen(nullable=False), - DoubleGen(nullable=False), + pytest.param(FloatGen(nullable=False), + marks=pytest.mark.xfail(is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/9778")), + pytest.param(DoubleGen(nullable=False), + marks=pytest.mark.xfail(is_databricks_runtime(), + reason="https://github.com/NVIDIA/spark-rapids/issues/9778")), StringGen(nullable=False), pytest.param(DecimalGen(nullable=False), marks=pytest.mark.xfail(reason="fastparquet reads Decimal columns as Float, as per " diff --git a/integration_tests/src/main/python/udf_cudf_test.py b/integration_tests/src/main/python/udf_cudf_test.py index 04416315702..6d94a5da206 100644 --- a/integration_tests/src/main/python/udf_cudf_test.py +++ b/integration_tests/src/main/python/udf_cudf_test.py @@ -37,10 +37,15 @@ from typing import Iterator from pyspark.sql import Window from pyspark.sql.functions import pandas_udf, PandasUDFType -from spark_session import with_cpu_session, with_gpu_session +from spark_session import is_databricks_runtime, is_spark_340_or_later, with_cpu_session, with_gpu_session from marks import cudf_udf +if is_databricks_runtime() and is_spark_340_or_later(): + # Databricks 13.3 does not use separate reader/writer threads for Python UDFs + # which can lead to hangs. Skipping these tests until the Python UDF handling is updated. + pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493") + _conf = { 'spark.rapids.sql.exec.AggregateInPandasExec': 'true', 'spark.rapids.sql.exec.FlatMapCoGroupsInPandasExec': 'true', diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py index 14fc57cf972..db8425f6387 100644 --- a/integration_tests/src/main/python/udf_test.py +++ b/integration_tests/src/main/python/udf_test.py @@ -15,7 +15,7 @@ import pytest from conftest import is_at_least_precommit_run -from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_350_or_later +from spark_session import is_databricks_runtime, is_before_spark_330, is_before_spark_350, is_spark_340_or_later from pyspark.sql.pandas.utils import require_minimum_pyarrow_version, require_minimum_pandas_version @@ -43,6 +43,12 @@ import pyarrow from typing import Iterator, Tuple + +if is_databricks_runtime() and is_spark_340_or_later(): + # Databricks 13.3 does not use separate reader/writer threads for Python UDFs + # which can lead to hangs. Skipping these tests until the Python UDF handling is updated. + pytestmark = pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/9493") + arrow_udf_conf = { 'spark.sql.execution.arrow.pyspark.enabled': 'true', 'spark.rapids.sql.exec.WindowInPandasExec': 'true', diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks index 0ea835d39a9..27c42f59aab 100644 --- a/jenkins/Jenkinsfile-blossom.premerge-databricks +++ b/jenkins/Jenkinsfile-blossom.premerge-databricks @@ -88,7 +88,7 @@ pipeline { // 'name' and 'value' only supprt literal string in the declarative Jenkins // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127 name 'DB_RUNTIME' - values '10.4', '11.3', '12.2' + values '10.4', '11.3', '12.2', '13.3' } } stages { diff --git a/pom.xml b/pom.xml index d099315ef8c..7e6ed88cf9f 100644 --- a/pom.xml +++ b/pom.xml @@ -509,6 +509,31 @@ delta-lake/delta-spark332db + + + release341db + + + buildver + 341db + + + + + 3.4.4 + spark341db + ${spark341db.version} + ${spark341db.version} + 3.3.1 + true + 1.12.0 + ${spark330.iceberg.version} + + + shim-deps/databricks + delta-lake/delta-spark341db + + release350 @@ -691,6 +716,7 @@ 3.3.2.3.3.7190.0-91 3.3.0-databricks 3.3.2-databricks + 3.4.1-databricks 3.5.0 3.12.4 4.3.0 @@ -745,7 +771,8 @@ 321db, 330db, - 332db + 332db, + 341db + release341db + + + buildver + 341db + + + + + 3.4.4 + spark341db + ${spark341db.version} + ${spark341db.version} + 3.3.1 + true + 1.12.0 + ${spark330.iceberg.version} + + + shim-deps/databricks + delta-lake/delta-spark341db + + release350 @@ -691,6 +716,7 @@ 3.3.2.3.3.7190.0-91 3.3.0-databricks 3.3.2-databricks + 3.4.1-databricks 3.5.0 3.12.4 4.3.0 @@ -745,7 +771,8 @@ 321db, 330db, - 332db + 332db, + 341db