diff --git a/docs/get-started/getting-started.md b/docs/get-started/getting-started.md index 14f1463a858..bb6d9f36895 100644 --- a/docs/get-started/getting-started.md +++ b/docs/get-started/getting-started.md @@ -425,6 +425,7 @@ simplify these settings in the near future). Choose the version of the shuffle m that matches your Spark version. Currently we support - Spark 3.0.0 (com.nvidia.spark.rapids.spark300.RapidsShuffleManager) - Spark 3.0.1 (com.nvidia.spark.rapids.spark301.RapidsShuffleManager) + - Spark 3.0.2 (com.nvidia.spark.rapids.spark302.RapidsShuffleManager) - Spark 3.1.0 (com.nvidia.spark.rapids.spark310.RapidsShuffleManager) ```shell diff --git a/docs/testing.md b/docs/testing.md index abab1f84406..4bed801188e 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -42,8 +42,10 @@ They generally follow TPCH but are not guaranteed to be the same. Unit tests exist in the tests directory. This is unconventional and is done so we can run the tests on the final shaded version of the plugin. It also helps with how we collect code coverage. You can run the unit tests against different versions of Spark using the different profiles. The -default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, and `-Pspark310tests` -runs unit tests against Spark 3.1.0. +default version runs again Spark 3.0.0, to run against other version use one of the following profiles: + - `-Pspark301tests` (Spark 3.0.1) + - `-Pspark302tests` (Spark 3.0.2) + - `-Pspark310tests` (Spark 3.1.0) ## Integration tests diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index 96b197c843d..e6b76445611 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -44,6 +44,12 @@ 3.0.1-SNAPSHOT + + spark302tests + + 3.0.2-SNAPSHOT + + spark310tests diff --git a/jenkins/Jenkinsfile.302.integration b/jenkins/Jenkinsfile.302.integration new file mode 100644 index 00000000000..d5258751369 --- /dev/null +++ b/jenkins/Jenkinsfile.302.integration @@ -0,0 +1,99 @@ +#!/usr/local/env groovy +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* +* Jenkins file for running spark3.0.2 integration tests +* +*/ + +@Library(['shared-libs', 'spark-jenkins-shared-lib']) _ + +def urmUrl="https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven" + +pipeline { + agent none + + options { + ansiColor('xterm') + timestamps() + timeout(time: 240, unit: 'MINUTES') + buildDiscarder(logRotator(numToKeepStr: '10')) + } + + parameters { + string(name: 'OVERWRITE_PARAMS', defaultValue: '', + description: 'parameters format XXX_VER=xxx;YYY_VER=yyy;') + string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build') + } + + environment { + JENKINS_ROOT = 'jenkins' + TEST_SCRIPT = '$JENKINS_ROOT/spark-tests.sh' + LIBCUDF_KERNEL_CACHE_PATH='/tmp/.cudf' + ARTIFACTORY_NAME = "${ArtifactoryConstants.ARTIFACTORY_NAME}" + URM_URL = "${urmUrl}" + MVN_URM_MIRROR='-s jenkins/settings.xml -P mirror-apache-to-urm' + } + + stages { + stage('IT on 3.0.2-SNAPSHOT') { + agent { label 'docker-gpu' } + environment {SPARK_VER='3.0.2-SNAPSHOT'} + steps { + script { + def CUDA_NAME=sh(returnStdout: true, + script: '. jenkins/version-def.sh>&2 && echo -n $CUDA_CLASSIFIER | sed "s/-/./g"') + def IMAGE_NAME="$ARTIFACTORY_NAME/sw-spark-docker/plugin:it-centos7-$CUDA_NAME" + def CUDA_VER="$CUDA_NAME" - "cuda" + sh "docker pull $IMAGE_NAME" + docker.image(IMAGE_NAME).inside("--runtime=nvidia -v ${HOME}/.zinc:${HOME}/.zinc:rw") { + sh "bash $TEST_SCRIPT" + } + } + } + } + } // end of stages + post { + always { + script { + def status = "failed" + if (currentBuild.currentResult == "SUCCESS") { + status = "success" + slack("#rapidsai-spark-cicd", "Success", color: "#33CC33") + } + else { + slack("#rapidsai-spark-cicd", "Failed", color: "#FF0000") + } + } + echo 'Pipeline finished!' + } + } +} // end of pipeline + +void slack(Map params = [:], String channel, String message) { + Map defaultParams = [ + color: "#000000", + baseUrl: "${SparkConstants.SLACK_API_ENDPOINT}", + tokenCredentialId: "slack_token" + ] + + params["channel"] = channel + params["message"] = "${BUILD_URL}\n" + message + + slackSend(defaultParams << params) +} diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 7e445913f31..469efb79864 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -22,6 +22,7 @@ set -ex mvn -U -B -Pinclude-databricks clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 # Run unit tests against other spark versions mvn -U -B -Pspark301tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 +mvn -U -B -Pspark302tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 mvn -U -B -Pspark310tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 # Parse cudf and spark files from local mvn repo diff --git a/pom.xml b/pom.xml index bd2900c9e51..3c0f76825f9 100644 --- a/pom.xml +++ b/pom.xml @@ -169,6 +169,7 @@ 1.7.30 3.0.0 3.0.1-SNAPSHOT + 3.0.2-SNAPSHOT 3.1.0-SNAPSHOT diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index af023d99989..75dce894cd4 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -65,6 +65,12 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark302_${scala.binary.version} + ${project.version} + compile + com.nvidia rapids-4-spark-shims-spark301_${scala.binary.version} diff --git a/shims/pom.xml b/shims/pom.xml index 2df7c512232..d5420c8509a 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -44,6 +44,7 @@ spark300 spark301 + spark302 spark310 aggregator diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml new file mode 100644 index 00000000000..407c141be30 --- /dev/null +++ b/shims/spark302/pom.xml @@ -0,0 +1,47 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-shims_2.12 + 0.2.0-SNAPSHOT + ../pom.xml + + com.nvidia + rapids-4-spark-shims-spark302_2.12 + RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.0.2 Shim + The RAPIDS SQL plugin for Apache Spark 3.0.2 Shim + 0.2.0-SNAPSHOT + + + + com.nvidia + rapids-4-spark-shims-spark301_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark302.version} + provided + + + diff --git a/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider b/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider new file mode 100644 index 00000000000..9f61dd0e23e --- /dev/null +++ b/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider @@ -0,0 +1 @@ +com.nvidia.spark.rapids.shims.spark302.SparkShimServiceProvider diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala new file mode 100644 index 00000000000..1293ba704cc --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark302 + +import com.nvidia.spark.rapids.ShimVersion +import com.nvidia.spark.rapids.shims.spark301.Spark301Shims +import com.nvidia.spark.rapids.spark302.RapidsShuffleManager + +class Spark302Shims extends Spark301Shims { + + override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + + override def getRapidsShuffleManagerClass: String = { + classOf[RapidsShuffleManager].getCanonicalName + } +} diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..17c921ca381 --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark302 + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(3, 0, 2) + val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") +} +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } + + def buildShim: SparkShims = { + new Spark302Shims() + } +} diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala new file mode 100644 index 00000000000..af5102b00a1 --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.spark302 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.shims.spark300.RapidsShuffleInternalManager + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) { +} diff --git a/tests/pom.xml b/tests/pom.xml index 2037e760991..a59f89c84ed 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -46,6 +46,12 @@ 3.0.1-SNAPSHOT + + spark302tests + + 3.0.2-SNAPSHOT + + spark310tests