From 0509509f17813aa9ca8ae6d7e7bd0fac26651e7c Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Thu, 15 Apr 2021 21:40:27 +0800 Subject: [PATCH] Add dynamic Spark configuration for Databricks (#2116) * Add daynamic spark confs for the Databricks We need a way to set spark confs dynamically for the Databricks, e.g., when we test cuDF sonatype release jars, we need to disable cudf-rapids version match by adding "--conf spark.rapids.cudfVersionOverride=true", or enable/disable AQE, or anything else. By adding the parameter spark_conf="--conf spark.xxx.xxx=xxx --conf ......" for the script 'run-tests.py', we can dynamically add whatever confs for the Databricks cluster. Signed-off-by: Tim Liu * Comma separated list of spark configurations Signed-off-by: Tim Liu * Add a comment to make the '-f' format clear * Add a comment to make the '-f' format clear * Fix typo * Add '--conf' if the SPARK_CONF is not empty --- jenkins/databricks/params.py | 13 +++++++++---- jenkins/databricks/run-tests.py | 2 +- jenkins/databricks/test.sh | 21 ++++++++++++++++++--- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/jenkins/databricks/params.py b/jenkins/databricks/params.py index e96acded2b4..ff815735a69 100644 --- a/jenkins/databricks/params.py +++ b/jenkins/databricks/params.py @@ -26,19 +26,21 @@ clusterid = '' build_profiles = 'databricks,!snapshot-shims' jar_path = '' +# `spark_conf` can take comma seperated mutiple spark configurations, e.g., spark.foo=1,spark.bar=2,...' +spark_conf = '' try: - opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:', - ['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath']) + opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:f:', + ['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath', 'sparkconf']) except getopt.GetoptError: print( - 'run-tests.py -s -t -c -p -l -d -z -v -b -j ') + 'run-tests.py -s -t -c -p -l -d -z -v -b -j -f ') sys.exit(2) for opt, arg in opts: if opt == '-h': print( - 'run-tests.py -s -t -c -p -n -l -d , -z -v -b ') + 'run-tests.py -s -t -c -p -n -l -d , -z -v -b -f ') sys.exit() elif opt in ('-w', '--workspace'): workspace = arg @@ -60,6 +62,8 @@ build_profiles = arg elif opt in ('-j', '--jarpath'): jar_path = arg + elif opt in ('-f', '--sparkconf'): + spark_conf = arg print('-w is ' + workspace) print('-c is ' + clusterid) @@ -69,3 +73,4 @@ print('-z is ' + source_tgz) print('-v is ' + base_spark_pom_version) print('-j is ' + jar_path) +print('-f is ' + spark_conf) diff --git a/jenkins/databricks/run-tests.py b/jenkins/databricks/run-tests.py index 3e33d7215ab..79e7e629aea 100644 --- a/jenkins/databricks/run-tests.py +++ b/jenkins/databricks/run-tests.py @@ -35,7 +35,7 @@ def main(): print("rsync command: %s" % rsync_command) subprocess.check_call(rsync_command, shell = True) - ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path) + ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path, params.spark_conf) print("ssh command: %s" % ssh_command) subprocess.check_call(ssh_command, shell = True) diff --git a/jenkins/databricks/test.sh b/jenkins/databricks/test.sh index 8df0104d9da..1e7e1afb96f 100755 --- a/jenkins/databricks/test.sh +++ b/jenkins/databricks/test.sh @@ -15,9 +15,10 @@ # limitations under the License. # -set -e +set -ex LOCAL_JAR_PATH=$1 +SPARK_CONF=$2 # tests export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH @@ -38,6 +39,20 @@ CUDF_UDF_TEST_ARGS="--conf spark.python.daemon.module=rapids.daemon_databricks \ --conf spark.rapids.python.memory.gpu.allocFraction=0.1 \ --conf spark.rapids.python.concurrentPythonWorkers=2" +## 'spark.foo=1,spark.bar=2,...' to 'export PYSP_TEST_spark_foo=1 export PYSP_TEST_spark_bar=2' +if [ -n "$SPARK_CONF" ]; then + CONF_LIST=${SPARK_CONF//','/' '} + for CONF in ${CONF_LIST}; do + KEY=${CONF%%=*} + VALUE=${CONF#*=} + ## run_pyspark_from_build.sh requires 'export PYSP_TEST_spark_foo=1' as the spark configs + export PYSP_TEST_${KEY//'.'/'_'}=$VALUE + done + + ## 'spark.foo=1,spark.bar=2,...' to '--conf spark.foo=1 --conf spark.bar=2 --conf ...' + SPARK_CONF="--conf ${SPARK_CONF/','/' --conf '}" +fi + TEST_TYPE="nightly" if [ -d "$LOCAL_JAR_PATH" ]; then ## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo @@ -45,7 +60,7 @@ if [ -d "$LOCAL_JAR_PATH" ]; then ## Run cudf-udf tests CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls $LOCAL_JAR_PATH/rapids-4-spark_*.jar | grep -v 'tests.jar'`" - LOCAL_JAR_PATH=$LOCAL_JAR_PATH SPARK_SUBMIT_FLAGS=$CUDF_UDF_TEST_ARGS TEST_PARALLEL=1 \ + LOCAL_JAR_PATH=$LOCAL_JAR_PATH SPARK_SUBMIT_FLAGS="$SPARK_CONF $CUDF_UDF_TEST_ARGS" TEST_PARALLEL=1 \ bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" -m "cudf_udf" --cudf_udf --test_type=$TEST_TYPE else ## Run tests with jars building from the spark-rapids source code @@ -53,6 +68,6 @@ else ## Run cudf-udf tests CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls /home/ubuntu/spark-rapids/dist/target/rapids-4-spark_*.jar | grep -v 'tests.jar'`" - SPARK_SUBMIT_FLAGS=$CUDF_UDF_TEST_ARGS TEST_PARALLEL=1 \ + SPARK_SUBMIT_FLAGS="$SPARK_CONF $CUDF_UDF_TEST_ARGS" TEST_PARALLEL=1 \ bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" -m "cudf_udf" --cudf_udf --test_type=$TEST_TYPE fi