Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Spark 3.0.2 to Shim layer #596

Merged
merged 7 commits into from
Aug 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/get-started/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ simplify these settings in the near future). Choose the version of the shuffle m
that matches your Spark version. Currently we support
- Spark 3.0.0 (com.nvidia.spark.rapids.spark300.RapidsShuffleManager)
- Spark 3.0.1 (com.nvidia.spark.rapids.spark301.RapidsShuffleManager)
- Spark 3.0.2 (com.nvidia.spark.rapids.spark302.RapidsShuffleManager)
- Spark 3.1.0 (com.nvidia.spark.rapids.spark310.RapidsShuffleManager)

```shell
Expand Down
6 changes: 4 additions & 2 deletions docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ They generally follow TPCH but are not guaranteed to be the same.
Unit tests exist in the tests directory. This is unconventional and is done so we can run the tests
on the final shaded version of the plugin. It also helps with how we collect code coverage.
You can run the unit tests against different versions of Spark using the different profiles. The
default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, and `-Pspark310tests`
runs unit tests against Spark 3.1.0.
default version runs again Spark 3.0.0, to run against other version use one of the following profiles:
- `-Pspark301tests` (Spark 3.0.1)
- `-Pspark302tests` (Spark 3.0.2)
- `-Pspark310tests` (Spark 3.1.0)

## Integration tests

Expand Down
6 changes: 6 additions & 0 deletions integration_tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
<spark.test.version>3.0.1-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark302tests</id>
<properties>
<spark.test.version>3.0.2-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark310tests</id>
<properties>
Expand Down
99 changes: 99 additions & 0 deletions jenkins/Jenkinsfile.302.integration
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/local/env groovy
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
*
* Jenkins file for running spark3.0.2 integration tests
*
*/

@Library(['shared-libs', 'spark-jenkins-shared-lib']) _

def urmUrl="https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"

pipeline {
agent none

options {
ansiColor('xterm')
timestamps()
timeout(time: 240, unit: 'MINUTES')
buildDiscarder(logRotator(numToKeepStr: '10'))
}

parameters {
string(name: 'OVERWRITE_PARAMS', defaultValue: '',
description: 'parameters format XXX_VER=xxx;YYY_VER=yyy;')
string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build')
}

environment {
JENKINS_ROOT = 'jenkins'
TEST_SCRIPT = '$JENKINS_ROOT/spark-tests.sh'
LIBCUDF_KERNEL_CACHE_PATH='/tmp/.cudf'
ARTIFACTORY_NAME = "${ArtifactoryConstants.ARTIFACTORY_NAME}"
URM_URL = "${urmUrl}"
MVN_URM_MIRROR='-s jenkins/settings.xml -P mirror-apache-to-urm'
}

stages {
stage('IT on 3.0.2-SNAPSHOT') {
agent { label 'docker-gpu' }
environment {SPARK_VER='3.0.2-SNAPSHOT'}
steps {
script {
def CUDA_NAME=sh(returnStdout: true,
script: '. jenkins/version-def.sh>&2 && echo -n $CUDA_CLASSIFIER | sed "s/-/./g"')
def IMAGE_NAME="$ARTIFACTORY_NAME/sw-spark-docker/plugin:it-centos7-$CUDA_NAME"
def CUDA_VER="$CUDA_NAME" - "cuda"
sh "docker pull $IMAGE_NAME"
docker.image(IMAGE_NAME).inside("--runtime=nvidia -v ${HOME}/.zinc:${HOME}/.zinc:rw") {
sh "bash $TEST_SCRIPT"
}
}
}
}
} // end of stages
post {
always {
script {
def status = "failed"
if (currentBuild.currentResult == "SUCCESS") {
status = "success"
slack("#rapidsai-spark-cicd", "Success", color: "#33CC33")
}
else {
slack("#rapidsai-spark-cicd", "Failed", color: "#FF0000")
}
}
echo 'Pipeline finished!'
}
}
} // end of pipeline

void slack(Map params = [:], String channel, String message) {
Map defaultParams = [
color: "#000000",
baseUrl: "${SparkConstants.SLACK_API_ENDPOINT}",
tokenCredentialId: "slack_token"
]

params["channel"] = channel
params["message"] = "${BUILD_URL}\n" + message

slackSend(defaultParams << params)
}
1 change: 1 addition & 0 deletions jenkins/spark-nightly-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set -ex
mvn -U -B -Pinclude-databricks clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
# Run unit tests against other spark versions
mvn -U -B -Pspark301tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
mvn -U -B -Pspark302tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
mvn -U -B -Pspark310tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2

# Parse cudf and spark files from local mvn repo
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
<slf4j.version>1.7.30</slf4j.version>
<spark300.version>3.0.0</spark300.version>
<spark301.version>3.0.1-SNAPSHOT</spark301.version>
<spark302.version>3.0.2-SNAPSHOT</spark302.version>
<spark310.version>3.1.0-SNAPSHOT</spark310.version>
</properties>

Expand Down
6 changes: 6 additions & 0 deletions shims/aggregator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark302_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark301_${scala.binary.version}</artifactId>
Expand Down
1 change: 1 addition & 0 deletions shims/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<modules>
<module>spark300</module>
<module>spark301</module>
<module>spark302</module>
<module>spark310</module>
<module>aggregator</module>
</modules>
Expand Down
47 changes: 47 additions & 0 deletions shims/spark302/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020, NVIDIA CORPORATION.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims_2.12</artifactId>
<version>0.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark302_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.0.2 Shim</name>
<description>The RAPIDS SQL plugin for Apache Spark 3.0.2 Shim</description>
<version>0.2.0-SNAPSHOT</version>

<dependencies>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark301_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark302.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
com.nvidia.spark.rapids.shims.spark302.SparkShimServiceProvider
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark302

import com.nvidia.spark.rapids.ShimVersion
import com.nvidia.spark.rapids.shims.spark301.Spark301Shims
import com.nvidia.spark.rapids.spark302.RapidsShuffleManager

class Spark302Shims extends Spark301Shims {

override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION

override def getRapidsShuffleManagerClass: String = {
classOf[RapidsShuffleManager].getCanonicalName
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark302

import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion}

object SparkShimServiceProvider {
val VERSION = SparkShimVersion(3, 0, 2)
val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT")
}
class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {

def matchesVersion(version: String): Boolean = {
SparkShimServiceProvider.VERSIONNAMES.contains(version)
}

def buildShim: SparkShims = {
new Spark302Shims()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.spark302

import org.apache.spark.SparkConf
import org.apache.spark.sql.rapids.shims.spark300.RapidsShuffleInternalManager

/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
sealed class RapidsShuffleManager(
conf: SparkConf,
isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) {
}
6 changes: 6 additions & 0 deletions tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
<spark.test.version>3.0.1-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark302tests</id>
<properties>
<spark.test.version>3.0.2-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark310tests</id>
<properties>
Expand Down