From eac020af34d2be85d4d84f2f8b02ba168d3658d3 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Tue, 28 Jul 2020 14:38:55 -0500
Subject: [PATCH 1/2] remove unneeded files and use rm -rf

---
 jenkins/Jenkinsfile.databricksnightly |   2 +-
 jenkins/Jenkinsfile.databricksrelease | 110 ------------------------
 jenkins/databricks/dbimports.patch    | 118 --------------------------
 3 files changed, 1 insertion(+), 229 deletions(-)
 delete mode 100644 jenkins/Jenkinsfile.databricksrelease
 delete mode 100644 jenkins/databricks/dbimports.patch

diff --git a/jenkins/Jenkinsfile.databricksnightly b/jenkins/Jenkinsfile.databricksnightly
index 6bc6a8bec38..47eef0692ea 100644
--- a/jenkins/Jenkinsfile.databricksnightly
+++ b/jenkins/Jenkinsfile.databricksnightly
@@ -76,7 +76,7 @@ pipeline {
             steps {
                 script {
                     sshagent(credentials : ['svcngcc_pubpriv']) {
-                        sh "rm spark-rapids-ci.tgz"
+                        sh "rm -rf spark-rapids-ci.tgz"
                         sh "tar -zcvf spark-rapids-ci.tgz *"
                         sh "python3.6 ./jenkins/databricks/run-tests.py -c $CLUSTER_ID -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh -j $CI_RAPIDS_JAR -b $DATABRICKS_VERSION -k $SPARK_VERSION -a $SCALA_VERSION -f $CUDF_VERSION -u $CUDA_VERSION -m $CI_CUDF_JAR"
                         sh "./jenkins/databricks/deploy.sh"
diff --git a/jenkins/Jenkinsfile.databricksrelease b/jenkins/Jenkinsfile.databricksrelease
deleted file mode 100644
index 647bab74099..00000000000
--- a/jenkins/Jenkinsfile.databricksrelease
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/local/env groovy
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
-*
-* Jenkinsfile for building and deploy rapids-plugin for Databricks to public repo
-*
-*/
-@Library('shared-libs') _
-
-def urmUrl="https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"
-
-pipeline {
-    agent {
-        dockerfile {
-            label 'docker-deploy||docker-gpu'
-            filename 'Dockerfile.ubuntu16'
-            dir "jenkins"
-            args '--runtime=nvidia -v ${HOME}/.m2:${HOME}/.m2:rw \
-                -v ${HOME}/.zinc:${HOME}/.zinc:rw'
-        }
-    }
-
-    options {
-        ansiColor('xterm')
-        timeout(time: 120, unit: 'MINUTES')
-        buildDiscarder(logRotator(numToKeepStr: '10'))
-    }
-
-    parameters {
-        string(name: 'DEPLOY_TO', defaultValue: 'https://oss.sonatype.org/service/local/staging/deploy/maven2',
-            description: 'The repo URL where to deploy the artifacts')
-        string(name: 'DATABRICKS_VERSION',
-                defaultValue: '0.2.0-SNAPSHOT', description: 'Version to set')
-        string(name: 'CUDF_VERSION',
-                defaultValue: '0.15-SNAPSHOT', description: 'Cudf version to use')
-        string(name: 'CUDA_VERSION',
-                defaultValue: 'cuda10-1', description: 'cuda version to use')
-        string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build')
-    }
-
-    environment {
-        JENKINS_ROOT='jenkins'
-        LIBCUDF_KERNEL_CACHE_PATH='/tmp/.cudf'
-        MVN_MIRROR='-s jenkins/settings.xml -P mirror-apache-to-urm'
-        URM_CREDS = credentials("svcngcc_artifactory")
-        DATABRICKS_TOKEN = credentials("SPARK_DATABRICKS_TOKEN")
-        DIST_PL='dist'
-        SQL_PL='sql-plugin'
-        SCALA_VERSION = '2.12'
-        SPARK_VERSION = '3.0.0-databricks'
-        CI_RAPIDS_JAR = 'rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar'
-        CI_CUDF_JAR = 'cudf-0.14-cuda10-1.jar'
-        LOCAL_URL = "${localUrl}"
-    }
-
-    stages {
-        stage('Build') {
-            steps {
-                script {
-                    sshagent(credentials : ['svcngcc_pubpriv']) {
-                        sh "rm spark-rapids-ci.tgz"
-                        sh "tar -zcvf spark-rapids-ci.tgz * || true"
-                        sh "python3.6 ./jenkins/databricks/run-tests.py -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh -j $CI_RAPIDS_JAR -b $DATABRICKS_VERSION -k $SPARK_VERSION -a $SCALA_VERSION -f $CUDF_VERSION -u $CUDA_VERSION -m $CI_CUDF_JAR"
-                    }
-                }
-            }
-        }
-        stage("Deploy") {
-            environment {
-                SERVER_ID='ossrh'
-                SERVER_URL="${DEPLOY_TO}"
-                GPG_PASSPHRASE=credentials('SPARK_RAPIDS_GPG_PASSPHRASE')
-                GPG_FILE=credentials('SPARK_RAPIDS_GPG_PRIVATE_KEY')
-                SONATYPE=credentials('SPARK_SONATYPE_USERPASS')
-                GNUPGHOME="${WORKSPACE}/.gnupg"
-            }
-            steps {
-                script {
-                    sh 'rm -rf $GNUPGHOME'
-                    sh 'gpg --import $GPG_FILE'
-                    retry (3) {
-                        sh "bash $JENKINS_ROOT/deploy.sh true true"
-                    }
-                }
-            }
-        }
-        stage('Cleanup') {
-            steps {
-                script {
-                    sh "python3.6 ./jenkins/databricks/shutdown.py -t $DATABRICKS_TOKEN"
-                }
-            }
-        }
-    } // End of stages
-} // end of pipeline
diff --git a/jenkins/databricks/dbimports.patch b/jenkins/databricks/dbimports.patch
deleted file mode 100644
index db44ecf0e35..00000000000
--- a/jenkins/databricks/dbimports.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuHashJoin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuHashJoin.scala
-index f0aaec3..eafba2a 100644
---- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuHashJoin.scala
-+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuHashJoin.scala
-@@ -19,8 +19,9 @@ import ai.rapids.cudf.{NvtxColor, Table}
- 
- import org.apache.spark.TaskContext
- import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
- import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, JoinType, LeftAnti, LeftExistence, LeftOuter, LeftSemi, RightOuter}
--import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, HashJoin}
-+import org.apache.spark.sql.execution.joins.HashJoin
- import org.apache.spark.sql.execution.metric.SQLMetric
- import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
- 
-diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
-index 7ae310b..3ebde77 100644
---- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
-+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
-@@ -22,10 +22,11 @@ import org.apache.spark.TaskContext
- import org.apache.spark.rdd.RDD
- import org.apache.spark.sql.catalyst.InternalRow
- import org.apache.spark.sql.catalyst.expressions.Expression
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
- import org.apache.spark.sql.catalyst.plans.JoinType
- import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution}
- import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
--import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide, ShuffledHashJoinExec}
-+import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec
- import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
- import org.apache.spark.sql.vectorized.ColumnarBatch
- 
-diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinExec.scala
-index af7e607..6edf950 100644
---- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinExec.scala
-+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinExec.scala
-@@ -17,9 +17,10 @@
- package com.nvidia.spark.rapids
- 
- import org.apache.spark.internal.Logging
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
- import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter}
- import org.apache.spark.sql.execution.SortExec
--import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, SortMergeJoinExec}
-+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
- 
- class GpuSortMergeJoinMeta(
-     join: SortMergeJoinExec,
-diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
-index 834ec51..646ccda 100644
---- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
-+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala
-@@ -22,12 +22,13 @@ import com.nvidia.spark.rapids.GpuOverrides.isStringLit
- 
- import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ComplexTypeMergingExpression, Expression, String2TrimExpression, TernaryExpression, UnaryExpression}
- import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
- import org.apache.spark.sql.catalyst.plans.physical.Partitioning
- import org.apache.spark.sql.connector.read.Scan
- import org.apache.spark.sql.execution.SparkPlan
- import org.apache.spark.sql.execution.command.DataWritingCommand
- import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
--import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BuildLeft, BuildRight, ShuffledHashJoinExec, SortMergeJoinExec}
-+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
- import org.apache.spark.sql.types.{CalendarIntervalType, DataType, DataTypes, StringType}
- 
- trait ConfKeysAndIncompat {
-diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala
-index 4c8c540..fb6dc06 100644
---- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala
-+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala
-@@ -27,8 +27,8 @@ import org.apache.spark.rdd.RDD
- import org.apache.spark.serializer.Serializer
- import org.apache.spark.sql.catalyst.InternalRow
- import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-+import org.apache.spark.sql.catalyst.optimizer.BuildLeft
- import org.apache.spark.sql.execution.{BinaryExecNode, ExplainUtils, SparkPlan}
--import org.apache.spark.sql.execution.joins.BuildLeft
- import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
- import org.apache.spark.sql.rapids.execution.GpuBroadcastNestedLoopJoinExec
- import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
-diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
-index ac444d1..14a8c6e 100644
---- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
-+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
-@@ -22,6 +22,7 @@ import com.nvidia.spark.rapids.GpuMetricNames._
- import org.apache.spark.rdd.RDD
- import org.apache.spark.sql.catalyst.InternalRow
- import org.apache.spark.sql.catalyst.expressions.Expression
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
- import org.apache.spark.sql.catalyst.plans.JoinType
- import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Distribution, UnspecifiedDistribution}
- import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
-diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
-index c120444..16c318a 100644
---- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
-+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
-@@ -23,11 +23,12 @@ import com.nvidia.spark.rapids.GpuMetricNames.{NUM_OUTPUT_BATCHES, NUM_OUTPUT_RO
- import org.apache.spark.rdd.RDD
- import org.apache.spark.sql.catalyst.InternalRow
- import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
- import org.apache.spark.sql.catalyst.plans.{Cross, ExistenceJoin, FullOuter, Inner, InnerLike, JoinType, LeftExistence, LeftOuter, RightOuter}
- import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Distribution, IdentityBroadcastMode, UnspecifiedDistribution}
- import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
- import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
--import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec, BuildLeft, BuildRight, BuildSide}
-+import org.apache.spark.sql.execution.joins.{BroadcastNestedLoopJoinExec}
- import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
- import org.apache.spark.sql.vectorized.ColumnarBatch
- 
-@@ -222,4 +223,4 @@ case class GpuBroadcastNestedLoopJoinExec(
-       }
-     }
-   }
--}
-\ No newline at end of file
-+}

From 65c20be0a88e517efffa0833ec380478021d9801 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Tue, 28 Jul 2020 14:48:24 -0500
Subject: [PATCH 2/2] Fix scalastyle for databricks shim

---
 .../spark/rapids/shims/spark300db/Spark300dbShims.scala       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/shims/spark300db/src/main/scala/com/nvidia/spark/rapids/shims/spark300db/Spark300dbShims.scala b/shims/spark300db/src/main/scala/com/nvidia/spark/rapids/shims/spark300db/Spark300dbShims.scala
index fcf42aff1de..5895739e37a 100644
--- a/shims/spark300db/src/main/scala/com/nvidia/spark/rapids/shims/spark300db/Spark300dbShims.scala
+++ b/shims/spark300db/src/main/scala/com/nvidia/spark/rapids/shims/spark300db/Spark300dbShims.scala
@@ -70,7 +70,7 @@ class Spark300dbShims extends Spark300Shims {
 
   override def getExecs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = {
     Seq(
-     GpuOverrides.exec[FileSourceScanExec](
+      GpuOverrides.exec[FileSourceScanExec](
         "Reading data from files, often from Hive tables",
         (fsse, conf, p, r) => new SparkPlanMeta[FileSourceScanExec](fsse, conf, p, r) {
           // partition filters and data filters are not run on the GPU
@@ -104,7 +104,7 @@ class Spark300dbShims extends Spark300Shims {
         (join, conf, p, r) => new GpuBroadcastHashJoinMeta(join, conf, p, r)),
       GpuOverrides.exec[ShuffledHashJoinExec](
         "Implementation of join using hashed shuffled data",
-        (join, conf, p, r) => new GpuShuffledHashJoinMeta(join, conf, p, r)),
+        (join, conf, p, r) => new GpuShuffledHashJoinMeta(join, conf, p, r))
     ).map(r => (r.getClassFor.asSubclass(classOf[SparkPlan]), r)).toMap
   }