Skip to content

Commit

Permalink
turn the nightly tests on
Browse files Browse the repository at this point in the history
Signed-off-by: Raza Jafri <[email protected]>
  • Loading branch information
razajafri committed Sep 3, 2021
1 parent addbb4b commit cafaa08
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 21 deletions.
4 changes: 2 additions & 2 deletions docs/additional-functionality/cache-serializer.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ nav_order: 2

To use this serializer please run Spark with the following conf.
```
spark-shell --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer"
spark-shell --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.v2.ParquetCachedBatchSerializer
```


## Supported Types
## Supported Types

All types are supported on the CPU, on the GPU, ArrayType, MapType and BinaryType are not
supported. If an unsupported type is encountered the Rapids Accelerator for Apache Spark will fall
Expand Down
22 changes: 9 additions & 13 deletions jenkins/databricks/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,15 @@ IS_SPARK_311_OR_LATER=0
[[ "$(printf '%s\n' "3.1.1" "$BASE_SPARK_VER" | sort -V | head -n1)" = "3.1.1" ]] && IS_SPARK_311_OR_LATER=1

TEST_TYPE="nightly"
PCBS_CONF="com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer"
PCBS_CONF="com.nvidia.spark.rapids.shims.v2.ParquetCachedBatchSerializer"
if [ -d "$LOCAL_JAR_PATH" ]; then
## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo
LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE

# Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
## Run cache tests
#if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
# PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
# LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
#fi
if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
fi

## Run cudf-udf tests
CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls $LOCAL_JAR_PATH/rapids-4-spark_*.jar | grep -v 'tests.jar'`"
Expand All @@ -80,12 +78,10 @@ else
## Run tests with jars building from the spark-rapids source code
bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE

# Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
## Run cache tests
#if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
# PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
# bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
#fi
if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
fi

## Run cudf-udf tests
CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls /home/ubuntu/spark-rapids/dist/target/rapids-4-spark_*.jar | grep -v 'tests.jar'`"
Expand Down
9 changes: 3 additions & 6 deletions jenkins/spark-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ IS_SPARK_311_OR_LATER=0
export SPARK_TASK_MAXFAILURES=1
[[ "$IS_SPARK_311_OR_LATER" -eq "0" ]] && SPARK_TASK_MAXFAILURES=4

IS_SPARK_311=0
[[ "$SPARK_VER" == "3.1.1" ]] && IS_SPARK_311=1

export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH"

#stop and restart SPARK ETL
Expand Down Expand Up @@ -135,7 +132,7 @@ run_test() {

cache_serializer)
SPARK_SUBMIT_FLAGS="$BASE_SPARK_SUBMIT_ARGS $SEQ_CONF \
--conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer" \
--conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.v2.ParquetCachedBatchSerializer" \
./run_pyspark_from_build.sh -k cache_test
;;

Expand Down Expand Up @@ -175,8 +172,8 @@ fi
# cudf_udf_test
run_test cudf_udf_test

# Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
if [[ "$IS_SPARK_311" -eq "1" ]]; then
# only run cache tests with our serializer in nightly test for Spark version >= 3.1.1
if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
run_test cache_serializer
fi

Expand Down

0 comments on commit cafaa08

Please sign in to comment.