From 3a77447742113cbb80b3b309bedbe59fc9a4302a Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 11:22:01 -0500 Subject: [PATCH 1/7] Add Spark 3.0.2 to Shim layer Signed-off-by: Thomas Graves --- pom.xml | 1 + shims/aggregator/pom.xml | 6 ++++ shims/pom.xml | 1 + ...idia.spark.rapids.SparkShimServiceProvider | 1 + .../rapids/shims/spark302/Spark302Shims.scala | 30 ++++++++++++++++ .../spark302/SparkShimServiceProvider.scala | 34 +++++++++++++++++++ .../spark302/RapidsShuffleManager.scala | 26 ++++++++++++++ tests/pom.xml | 6 ++++ 8 files changed, 105 insertions(+) create mode 100644 shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider create mode 100644 shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala create mode 100644 shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala create mode 100644 shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala diff --git a/pom.xml b/pom.xml index bd2900c9e51..3c0f76825f9 100644 --- a/pom.xml +++ b/pom.xml @@ -169,6 +169,7 @@ 1.7.30 3.0.0 3.0.1-SNAPSHOT + 3.0.2-SNAPSHOT 3.1.0-SNAPSHOT diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index af023d99989..75dce894cd4 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -65,6 +65,12 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark302_${scala.binary.version} + ${project.version} + compile + com.nvidia rapids-4-spark-shims-spark301_${scala.binary.version} diff --git a/shims/pom.xml b/shims/pom.xml index 2df7c512232..d5420c8509a 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -44,6 +44,7 @@ spark300 spark301 + spark302 spark310 aggregator diff --git a/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider b/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider new file mode 100644 index 00000000000..9f61dd0e23e --- /dev/null +++ b/shims/spark302/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider @@ -0,0 +1 @@ +com.nvidia.spark.rapids.shims.spark302.SparkShimServiceProvider diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala new file mode 100644 index 00000000000..1293ba704cc --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/Spark302Shims.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark302 + +import com.nvidia.spark.rapids.ShimVersion +import com.nvidia.spark.rapids.shims.spark301.Spark301Shims +import com.nvidia.spark.rapids.spark302.RapidsShuffleManager + +class Spark302Shims extends Spark301Shims { + + override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + + override def getRapidsShuffleManagerClass: String = { + classOf[RapidsShuffleManager].getCanonicalName + } +} diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..17c921ca381 --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark302 + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(3, 0, 2) + val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") +} +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } + + def buildShim: SparkShims = { + new Spark302Shims() + } +} diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala new file mode 100644 index 00000000000..af5102b00a1 --- /dev/null +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/spark302/RapidsShuffleManager.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.spark302 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.shims.spark300.RapidsShuffleInternalManager + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) { +} diff --git a/tests/pom.xml b/tests/pom.xml index 2037e760991..a59f89c84ed 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -46,6 +46,12 @@ 3.0.1-SNAPSHOT + + spark302tests + + 3.0.2-SNAPSHOT + + spark310tests From 7169317480770e9a09036b76200f5dfdb13fc57f Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 11:24:21 -0500 Subject: [PATCH 2/7] Update docs for Spark 3.0.2 Signed-off-by: Thomas Graves --- docs/get-started/getting-started.md | 1 + docs/testing.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/get-started/getting-started.md b/docs/get-started/getting-started.md index 14f1463a858..bb6d9f36895 100644 --- a/docs/get-started/getting-started.md +++ b/docs/get-started/getting-started.md @@ -425,6 +425,7 @@ simplify these settings in the near future). Choose the version of the shuffle m that matches your Spark version. Currently we support - Spark 3.0.0 (com.nvidia.spark.rapids.spark300.RapidsShuffleManager) - Spark 3.0.1 (com.nvidia.spark.rapids.spark301.RapidsShuffleManager) + - Spark 3.0.2 (com.nvidia.spark.rapids.spark302.RapidsShuffleManager) - Spark 3.1.0 (com.nvidia.spark.rapids.spark310.RapidsShuffleManager) ```shell diff --git a/docs/testing.md b/docs/testing.md index abab1f84406..8e63736cb33 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -42,8 +42,8 @@ They generally follow TPCH but are not guaranteed to be the same. Unit tests exist in the tests directory. This is unconventional and is done so we can run the tests on the final shaded version of the plugin. It also helps with how we collect code coverage. You can run the unit tests against different versions of Spark using the different profiles. The -default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, and `-Pspark310tests` -runs unit tests against Spark 3.1.0. +default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, -Pspark302tests +run against Spark 3.0.2 and `-Pspark310tests runs unit tests against Spark 3.1.0. ## Integration tests From efaa66c6381353aef3e930883afb192c3c525c54 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 14:13:25 -0500 Subject: [PATCH 3/7] add missing pom Signed-off-by: Thomas Graves --- shims/spark302/pom.xml | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 shims/spark302/pom.xml diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml new file mode 100644 index 00000000000..407c141be30 --- /dev/null +++ b/shims/spark302/pom.xml @@ -0,0 +1,47 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-shims_2.12 + 0.2.0-SNAPSHOT + ../pom.xml + + com.nvidia + rapids-4-spark-shims-spark302_2.12 + RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.0.2 Shim + The RAPIDS SQL plugin for Apache Spark 3.0.2 Shim + 0.2.0-SNAPSHOT + + + + com.nvidia + rapids-4-spark-shims-spark301_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark302.version} + provided + + + From 1d0d46f4f955ad9417f2e7d3e665e465aa1a58d1 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 14:40:27 -0500 Subject: [PATCH 4/7] Add spark 3.0.2 unit tests to the nightly Signed-off-by: Thomas Graves --- jenkins/spark-nightly-build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 7e445913f31..469efb79864 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -22,6 +22,7 @@ set -ex mvn -U -B -Pinclude-databricks clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 # Run unit tests against other spark versions mvn -U -B -Pspark301tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 +mvn -U -B -Pspark302tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 mvn -U -B -Pspark310tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 # Parse cudf and spark files from local mvn repo From 6108a56a01a21efb88109076f35e9c367ff0a2c1 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 14:41:49 -0500 Subject: [PATCH 5/7] Add 3.0.2 to the integration tests --- integration_tests/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index 96b197c843d..e6b76445611 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -44,6 +44,12 @@ 3.0.1-SNAPSHOT + + spark302tests + + 3.0.2-SNAPSHOT + + spark310tests From 2a9fd2cbca20ad6260f1844239c34d6a77cd12a2 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 14:52:55 -0500 Subject: [PATCH 6/7] Add jenkins file for 3.0.2 integration tests --- jenkins/Jenkinsfile.302.integration | 99 +++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 jenkins/Jenkinsfile.302.integration diff --git a/jenkins/Jenkinsfile.302.integration b/jenkins/Jenkinsfile.302.integration new file mode 100644 index 00000000000..d5258751369 --- /dev/null +++ b/jenkins/Jenkinsfile.302.integration @@ -0,0 +1,99 @@ +#!/usr/local/env groovy +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* +* Jenkins file for running spark3.0.2 integration tests +* +*/ + +@Library(['shared-libs', 'spark-jenkins-shared-lib']) _ + +def urmUrl="https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven" + +pipeline { + agent none + + options { + ansiColor('xterm') + timestamps() + timeout(time: 240, unit: 'MINUTES') + buildDiscarder(logRotator(numToKeepStr: '10')) + } + + parameters { + string(name: 'OVERWRITE_PARAMS', defaultValue: '', + description: 'parameters format XXX_VER=xxx;YYY_VER=yyy;') + string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build') + } + + environment { + JENKINS_ROOT = 'jenkins' + TEST_SCRIPT = '$JENKINS_ROOT/spark-tests.sh' + LIBCUDF_KERNEL_CACHE_PATH='/tmp/.cudf' + ARTIFACTORY_NAME = "${ArtifactoryConstants.ARTIFACTORY_NAME}" + URM_URL = "${urmUrl}" + MVN_URM_MIRROR='-s jenkins/settings.xml -P mirror-apache-to-urm' + } + + stages { + stage('IT on 3.0.2-SNAPSHOT') { + agent { label 'docker-gpu' } + environment {SPARK_VER='3.0.2-SNAPSHOT'} + steps { + script { + def CUDA_NAME=sh(returnStdout: true, + script: '. jenkins/version-def.sh>&2 && echo -n $CUDA_CLASSIFIER | sed "s/-/./g"') + def IMAGE_NAME="$ARTIFACTORY_NAME/sw-spark-docker/plugin:it-centos7-$CUDA_NAME" + def CUDA_VER="$CUDA_NAME" - "cuda" + sh "docker pull $IMAGE_NAME" + docker.image(IMAGE_NAME).inside("--runtime=nvidia -v ${HOME}/.zinc:${HOME}/.zinc:rw") { + sh "bash $TEST_SCRIPT" + } + } + } + } + } // end of stages + post { + always { + script { + def status = "failed" + if (currentBuild.currentResult == "SUCCESS") { + status = "success" + slack("#rapidsai-spark-cicd", "Success", color: "#33CC33") + } + else { + slack("#rapidsai-spark-cicd", "Failed", color: "#FF0000") + } + } + echo 'Pipeline finished!' + } + } +} // end of pipeline + +void slack(Map params = [:], String channel, String message) { + Map defaultParams = [ + color: "#000000", + baseUrl: "${SparkConstants.SLACK_API_ENDPOINT}", + tokenCredentialId: "slack_token" + ] + + params["channel"] = channel + params["message"] = "${BUILD_URL}\n" + message + + slackSend(defaultParams << params) +} From cc4a53c491d2e18fd03b952585b37df96601f8c2 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 20 Aug 2020 15:58:14 -0500 Subject: [PATCH 7/7] Change docs to use bullet list --- docs/testing.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/testing.md b/docs/testing.md index 8e63736cb33..4bed801188e 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -42,8 +42,10 @@ They generally follow TPCH but are not guaranteed to be the same. Unit tests exist in the tests directory. This is unconventional and is done so we can run the tests on the final shaded version of the plugin. It also helps with how we collect code coverage. You can run the unit tests against different versions of Spark using the different profiles. The -default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, -Pspark302tests -run against Spark 3.0.2 and `-Pspark310tests runs unit tests against Spark 3.1.0. +default version runs again Spark 3.0.0, to run against other version use one of the following profiles: + - `-Pspark301tests` (Spark 3.0.1) + - `-Pspark302tests` (Spark 3.0.2) + - `-Pspark310tests` (Spark 3.1.0) ## Integration tests