From acfd8f3be1dc14659fc9b7c5061c0c8dee25010b Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Thu, 30 Jul 2020 13:48:45 -0500
Subject: [PATCH] Udf compiler pom followup (#475)

* Minor changes to udf compiler pom/docs

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>

* Update config descrition for the udfCompiler

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>
---
 docs/configs.md                               |   2 +-
 .../com/nvidia/spark/rapids/RapidsConf.scala  |   3 +-
 udf-compiler/pom.xml                          | 100 +++++++++---------
 3 files changed, 50 insertions(+), 55 deletions(-)
diff --git a/docs/configs.md b/docs/configs.md
index 1b34abe368b..460c1173f47 100644
--- a/docs/configs.md
+++ b/docs/configs.md
@@ -49,7 +49,6 @@ Name | Description | Default Value
 <a name="sql.concurrentGpuTasks"></a>spark.rapids.sql.concurrentGpuTasks|Set the number of tasks that can execute concurrently per GPU. Tasks may temporarily block when the number of concurrent tasks in the executor exceeds this amount. Allowing too many concurrent tasks on the same GPU may lead to GPU out of memory errors.|1
 <a name="sql.csvTimestamps.enabled"></a>spark.rapids.sql.csvTimestamps.enabled|When set to true, enables the CSV parser to read timestamps. The default output format for Spark includes a timezone at the end. Anything except the UTC timezone is not supported. Timestamps after 2038 and before 1902 are also not supported.|false
 <a name="sql.enabled"></a>spark.rapids.sql.enabled|Enable (true) or disable (false) sql operations on the GPU|true
-<a name="sql.udfCompiler.enabled"></a>spark.rapids.sql.udfCompiler.enabled|When set to true, all UDFs are compiled to Catalyst expressions by Catalyst Analyzer|false
 <a name="sql.explain"></a>spark.rapids.sql.explain|Explain why some parts of a query were not placed on a GPU or not. Possible values are ALL: print everything, NONE: print nothing, NOT_ON_GPU: print only parts of a query that did not go on the GPU|NONE
 <a name="sql.format.csv.enabled"></a>spark.rapids.sql.format.csv.enabled|When set to false disables all csv input and output acceleration. (only input is currently supported anyways)|true
 <a name="sql.format.csv.read.enabled"></a>spark.rapids.sql.format.csv.read.enabled|When set to false disables csv input acceleration|true
@@ -68,6 +67,7 @@ Name | Description | Default Value
 <a name="sql.reader.batchSizeRows"></a>spark.rapids.sql.reader.batchSizeRows|Soft limit on the maximum number of rows the reader will read per batch. The orc and parquet readers will read row groups until this limit is met or exceeded. The limit is respected by the csv reader.|2147483647
 <a name="sql.replaceSortMergeJoin.enabled"></a>spark.rapids.sql.replaceSortMergeJoin.enabled|Allow replacing sortMergeJoin with HashJoin|true
 <a name="sql.shuffle.spillThreads"></a>spark.rapids.sql.shuffle.spillThreads|Number of threads used to spill shuffle data to disk in the background.|6
+<a name="sql.udfCompiler.enabled"></a>spark.rapids.sql.udfCompiler.enabled|When set to true, Scala UDFs will be considered for compilation as Catalyst expressions|false
 <a name="sql.variableFloatAgg.enabled"></a>spark.rapids.sql.variableFloatAgg.enabled|Spark assumes that all operations produce the exact same result each time. This is not true for some floating point aggregations, which can produce slightly different results on the GPU as the aggregation is done in parallel.  This can enable those operations if you know the query is only computing it once.|false
 
 ## Supported GPU Operators and Fine Tuning 
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 566b3f05987..ac4b35eb826 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -351,8 +351,7 @@ object RapidsConf {
     .createWithDefault(true)
 
   val UDF_COMPILER_ENABLED = conf("spark.rapids.sql.udfCompiler.enabled")
-    .doc("When set to true, all UDFs will be compiled to Catalyst expressions by Catalyst " +
-      "Analyzer.")
+    .doc("When set to true, Scala UDFs will be considered for compilation as Catalyst expressions")
     .booleanConf
     .createWithDefault(false)
 
diff --git a/udf-compiler/pom.xml b/udf-compiler/pom.xml
index 425828327ae..bcbf7ed7ebd 100644
--- a/udf-compiler/pom.xml
+++ b/udf-compiler/pom.xml
@@ -25,31 +25,32 @@
         <version>0.2.0-SNAPSHOT</version>
     </parent>
     <groupId>com.nvidia</groupId>
-    <artifactId>rapids-4-spark-udf</artifactId>
+    <artifactId>rapids-4-spark-udf_2.12</artifactId>
+    <name>RAPIDS Accelerator for Apache Spark Scala UDF Plugin</name>
+    <description>The RAPIDS Scala UDF plugin for Apache Spark</description>
     <version>0.2.0-SNAPSHOT</version>
 
     <dependencies>
+        <dependency>
+            <groupId>ai.rapids</groupId>
+            <artifactId>cudf</artifactId>
+            <classifier>${cuda.version}</classifier>
+        </dependency>
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
         </dependency>
         <dependency>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-            <version>1.1.1</version>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <type>test-jar</type>
+            <scope>test</scope>
             <version>${spark.version}</version>
         </dependency>
-        <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-sql_${scala.binary.version}</artifactId>
-          <type>test-jar</type>
-          <scope>test</scope>
-          <version>${spark.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
@@ -67,54 +68,49 @@
             <version>${project.version}</version>
             <scope>provided</scope>
         </dependency>
-        <dependency>
-            <groupId>ai.rapids</groupId>
-            <artifactId>cudf</artifactId>
-            <classifier>${cuda.version}</classifier>
-        </dependency>
     </dependencies>
 
     <build>
         <resources>
-          <resource>
-            <!-- Include the properties file to provide the build information. -->
-            <directory>${project.build.directory}/extra-resources</directory>
-            <filtering>true</filtering>
-          </resource>
-          <resource>
-            <directory>${project.basedir}/..</directory>
-            <targetPath>META-INF</targetPath>
-            <includes>
-              <!-- The NOTICE will be taken care of by the antrun task below -->
-              <include>LICENSE</include>
-            </includes>
-          </resource>
+            <resource>
+                <!-- Include the properties file to provide the build information. -->
+                <directory>${project.build.directory}/extra-resources</directory>
+                <filtering>true</filtering>
+            </resource>
+            <resource>
+                <directory>${project.basedir}/..</directory>
+                <targetPath>META-INF</targetPath>
+                <includes>
+                    <!-- The NOTICE will be taken care of by the antrun task below -->
+                    <include>LICENSE</include>
+                </includes>
+            </resource>
         </resources>
         <plugins>
             <plugin>
-              <artifactId>maven-antrun-plugin</artifactId>
-              <executions>
-                <execution>
-                  <id>copy-notice</id>
-                  <goals>
-                    <goal>run</goal>
-                  </goals>
-                  <phase>process-resources</phase>
-                  <configuration>
-                    <target>
-                      <!-- copy NOTICE-binary to NOTICE -->
-                      <copy
-                          todir="${project.build.directory}/classes/META-INF/"
-                          verbose="true">
-                        <fileset dir="${project.basedir}/..">
-                          <include name="NOTICE-binary"/>
-                        </fileset>
-                        <mapper type="glob" from="*-binary" to="*"/>
-                      </copy>
-                    </target>
-                  </configuration>
-                </execution>
-              </executions>
+                <artifactId>maven-antrun-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-notice</id>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <phase>process-resources</phase>
+                        <configuration>
+                            <target>
+                                <!-- copy NOTICE-binary to NOTICE -->
+                                <copy
+                                    todir="${project.build.directory}/classes/META-INF/"
+                                    verbose="true">
+                                    <fileset dir="${project.basedir}/..">
+                                        <include name="NOTICE-binary"/>
+                                    </fileset>
+                                    <mapper type="glob" from="*-binary" to="*"/>
+                                </copy>
+                            </target>
+                        </configuration>
+                    </execution>
+                </executions>
             </plugin>
             <!-- disable surefire as we are using scalatest only -->
             <plugin>