From 0141f432847f947add7a348133213448f3404a1e Mon Sep 17 00:00:00 2001 From: Sameer Raheja Date: Mon, 15 Mar 2021 11:35:20 -0700 Subject: [PATCH] Update documentation to use cudf version 0.18.1 (#1934) Signed-off-by: Sameer Raheja --- api_validation/README.md | 2 +- docs/additional-functionality/cache-serializer.md | 2 +- docs/configs.md | 2 +- docs/demo/Databricks/generate-init-script.ipynb | 2 +- docs/download.md | 6 +++--- docs/get-started/Dockerfile.cuda | 2 +- docs/get-started/getting-started-on-prem.md | 4 ++-- integration_tests/README.md | 4 ++-- .../src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/api_validation/README.md b/api_validation/README.md index 25bee6d650c..c6667ef0dca 100644 --- a/api_validation/README.md +++ b/api_validation/README.md @@ -17,7 +17,7 @@ It requires cudf, rapids-4-spark and spark jars. ``` cd api_validation -// To run validation script on all version of Spark(3.0.0, 3.0.1 and 3.1.0-SNAPSHOT) +// To run validation script on all version of Spark(3.0.0, 3.0.1 and 3.1.1) sh auditAllVersions.sh // To run script on particular version we can use profile(spark300, spark301 and spark311) diff --git a/docs/additional-functionality/cache-serializer.md b/docs/additional-functionality/cache-serializer.md index 4a91597ef6f..08f4c695a91 100644 --- a/docs/additional-functionality/cache-serializer.md +++ b/docs/additional-functionality/cache-serializer.md @@ -15,7 +15,7 @@ nav_order: 2 utilize disk space to spill over. To read more about what storage levels are available look at `StorageLevel.scala` in Spark. - Starting in Spark 3.1.0 users can add their own cache serializer, if they desire, by + Starting in Spark 3.1.1 users can add their own cache serializer, if they desire, by setting the `spark.sql.cache.serializer` configuration. This is a static configuration that is set once for the duration of a Spark application which means that you can only set the conf before starting a Spark application and cannot be changed for that application's Spark diff --git a/docs/configs.md b/docs/configs.md index 34d9b9a4d96..6c6bbb2554e 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -10,7 +10,7 @@ The following is the list of options that `rapids-plugin-4-spark` supports. On startup use: `--conf [conf key]=[conf value]`. For example: ``` -${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar' \ +${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18.1-cuda10-1.jar' \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.rapids.sql.incompatibleOps.enabled=true ``` diff --git a/docs/demo/Databricks/generate-init-script.ipynb b/docs/demo/Databricks/generate-init-script.ipynb index ea79022702f..5672f3cdb54 100644 --- a/docs/demo/Databricks/generate-init-script.ipynb +++ b/docs/demo/Databricks/generate-init-script.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-0.4.0.jar https://oss.sonatype.org/content/repositories/staging/com/nvidia/rapids-4-spark_2.12/0.4.0/rapids-4-spark_2.12-0.4.0.jar\nsudo wget -O /databricks/jars/cudf-0.18-cuda10-1.jar https://oss.sonatype.org/content/repositories/staging/ai/rapids/cudf/0.18/cudf-0.18-cuda10-1.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-0.4.0.jar https://oss.sonatype.org/content/repositories/staging/com/nvidia/rapids-4-spark_2.12/0.4.0/rapids-4-spark_2.12-0.4.0.jar\nsudo wget -O /databricks/jars/cudf-0.18.1-cuda10-1.jar https://oss.sonatype.org/content/repositories/staging/ai/rapids/cudf/0.18.1/cudf-0.18.1-cuda10-1.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0} diff --git a/docs/download.md b/docs/download.md index f743eddfb38..36f4f121d39 100644 --- a/docs/download.md +++ b/docs/download.md @@ -47,9 +47,9 @@ Software Requirements: ### Download v0.4.0 * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/0.4.0/rapids-4-spark_2.12-0.4.0.jar) -* [cuDF 11.0 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda11.jar) -* [cuDF 10.2 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda10-2.jar) -* [cuDF 10.1 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda10-1.jar) +* [cuDF 11.0 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18.1/cudf-0.18.1-cuda11.jar) +* [cuDF 10.2 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18.1/cudf-0.18.1-cuda10-2.jar) +* [cuDF 10.1 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18.1/cudf-0.18.1-cuda10-1.jar) ## Release v0.3.0 This release includes additional performance improvements, including diff --git a/docs/get-started/Dockerfile.cuda b/docs/get-started/Dockerfile.cuda index 6e5a3688bd0..846e298e22d 100644 --- a/docs/get-started/Dockerfile.cuda +++ b/docs/get-started/Dockerfile.cuda @@ -52,7 +52,7 @@ COPY spark-3.0.1-bin-hadoop3.2/examples /opt/spark/examples COPY spark-3.0.1-bin-hadoop3.2/kubernetes/tests /opt/spark/tests COPY spark-3.0.1-bin-hadoop3.2/data /opt/spark/data -COPY cudf-0.18-cuda10-1.jar /opt/sparkRapidsPlugin +COPY cudf-0.18.1-cuda10-1.jar /opt/sparkRapidsPlugin COPY rapids-4-spark_2.12-0.4.0.jar /opt/sparkRapidsPlugin COPY getGpusResources.sh /opt/sparkRapidsPlugin diff --git a/docs/get-started/getting-started-on-prem.md b/docs/get-started/getting-started-on-prem.md index 2bf9ee40cb5..f2ff97a5f30 100644 --- a/docs/get-started/getting-started-on-prem.md +++ b/docs/get-started/getting-started-on-prem.md @@ -55,7 +55,7 @@ CUDA and will not run on other versions. The jars use a maven classifier to keep - CUDA 11.0 => classifier cuda11 For example, here is a sample version of the jars and cudf with CUDA 10.1 support: -- cudf-0.18-cuda10-1.jar +- cudf-0.18.1-cuda10-1.jar - rapids-4-spark_2.12-0.4.0.jar @@ -63,7 +63,7 @@ For simplicity export the location to these jars. This example assumes the sampl been placed in the `/opt/sparkRapidsPlugin` directory: ```shell export SPARK_RAPIDS_DIR=/opt/sparkRapidsPlugin -export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-0.18-cuda10-1.jar +export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-0.18.1-cuda10-1.jar export SPARK_RAPIDS_PLUGIN_JAR=${SPARK_RAPIDS_DIR}/rapids-4-spark_2.12-0.4.0.jar ``` diff --git a/integration_tests/README.md b/integration_tests/README.md index 7b8ec0501b5..ca38aa4dfa5 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -131,7 +131,7 @@ If you just want to verify the SQL replacement is working you will need to add t example assumes CUDA 10.1 is being used. ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar" ./runtests.py +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18.1-cuda10-1.jar" ./runtests.py ``` You don't have to enable the plugin for this to work, the test framework will do that for you. @@ -192,7 +192,7 @@ To run cudf_udf tests, need following configuration changes: As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 10.1: ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0.jar" ./runtests.py --cudf_udf +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18.1-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0.jar" ./runtests.py --cudf_udf ``` ## Writing tests diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index f0c67120633..a2426dc0ed3 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -916,7 +916,7 @@ object RapidsConf { |On startup use: `--conf [conf key]=[conf value]`. For example: | |``` - |${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar' \ + |${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18.1-cuda10-1.jar' \ |--conf spark.plugins=com.nvidia.spark.SQLPlugin \ |--conf spark.rapids.sql.incompatibleOps.enabled=true |```