Skip to content

Commit

Permalink
Use dist/pom file as source of truth for spark versions (#6437)
Browse files Browse the repository at this point in the history
* get spark shim version from pom

Signed-off-by: YanxuanLiu <[email protected]>

* correct comments

Signed-off-by: YanxuanLiu <[email protected]>

* added switch control for version-def

Signed-off-by: YanxuanLiu <[email protected]>

* replace mvn command with profile var

Signed-off-by: YanxuanLiu <[email protected]>

* fix some its

Signed-off-by: YanxuanLiu <[email protected]>

* add func to get versions from pom

Signed-off-by: YanxuanLiu <[email protected]>

* get version in version-def

Signed-off-by: YanxuanLiu <[email protected]>

* get versioon from version-def in yml

Signed-off-by: YanxuanLiu <[email protected]>

* Update .github/workflows/mvn-verify-check.yml

fix the path

Co-authored-by: Gera Shegalov <[email protected]>

* add premergeUT to pom

Signed-off-by: YanxuanLiu <[email protected]>

* fix nits

Signed-off-by: YanxuanLiu <[email protected]>

* fix bug: call version-def in yml

Signed-off-by: YanxuanLiu <[email protected]>

* fix bug

Signed-off-by: YanxuanLiu <[email protected]>

* fix nits and optimize env var names

Signed-off-by: YanxuanLiu <[email protected]>

* set headVersion with env

Signed-off-by: YanxuanLiu <[email protected]>

* fix yml bug

Signed-off-by: YanxuanLiu <[email protected]>

* add echo for debug

Signed-off-by: YanxuanLiu <[email protected]>

* add echo in yml for testing

Signed-off-by: YanxuanLiu <[email protected]>

* fix bug

Signed-off-by: YanxuanLiu <[email protected]>

* add SCRIPT_PATH for wrapper call

Signed-off-by: YanxuanLiu <[email protected]>

* add comment

Signed-off-by: YanxuanLiu <[email protected]>

* add comment

Signed-off-by: YanxuanLiu <[email protected]>

* remove common and seperate ut versions to two parts

Signed-off-by: YanxuanLiu <[email protected]>

* skip base version

Signed-off-by: YanxuanLiu <[email protected]>

* add comment

Signed-off-by: YanxuanLiu <[email protected]>

* added property for utf-8 cases

Signed-off-by: YanxuanLiu <[email protected]>

Signed-off-by: YanxuanLiu <[email protected]>
Co-authored-by: Gera Shegalov <[email protected]>
  • Loading branch information
YanxuanLiu and gerashegalov authored Sep 21, 2022
1 parent bc0aa04 commit 6d3f66c
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 30 deletions.
9 changes: 3 additions & 6 deletions .github/workflows/mvn-verify-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,11 @@ jobs:
id: noSnapshotVersionsStep
run: |
set -x
noSnapshotVersionsStr=$(mvn -B help:evaluate -q -pl dist -PnoSnapshots -Dexpression=included_buildvers -DforceStdout)
noSnapshotVersionsStr=$(echo $noSnapshotVersionsStr)
noSnapshotVersionsArr=($(IFS=", "; echo $noSnapshotVersionsStr))
tailNoSnapshotVersionsArr=(${noSnapshotVersionsArr[@]:1})
svArrBody=$(printf ",{\"spark-version\":\"%s\"}" "${tailNoSnapshotVersionsArr[@]}")
. jenkins/version-def.sh
svArrBody=$(printf ",{\"spark-version\":\"%s\"}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS_TAIL[@]}")
svArrBody=${svArrBody:1}
svJsonStr=$(printf {\"include\":[%s]} $svArrBody)
echo ::set-output name=headVersion::${noSnapshotVersionsArr[0]}
echo ::set-output name=headVersion::$SPARK_BASE_SHIM_VERSION
echo ::set-output name=tailVersions::$svJsonStr
package-aggregator:
Expand Down
39 changes: 39 additions & 0 deletions dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@
312db,
321db
</databricks.buildvers>
<!--
Build and run unit tests on one specific version for each sub-version (e.g. 311, 320, 330)
Base shim version (311 currently) should be covered in default mvn verify command of premerge script,
so base shim version is removed from the premergeUT list.
Separate the versions to two parts (premergeUT1, premergeUT2) for balancing the duration
-->
<premergeUT1.buildvers>
320
</premergeUT1.buildvers>
<premergeUT2.buildvers>
330
</premergeUT2.buildvers>
<premergeUTF8.buildvers>
320
</premergeUTF8.buildvers>
<dist.jar.name>${project.build.directory}/${project.build.finalName}-${cuda.version}.jar</dist.jar.name>
</properties>
<profiles>
Expand All @@ -78,6 +93,30 @@
</included_buildvers>
</properties>
</profile>
<profile>
<id>premergeUT1</id>
<properties>
<included_buildvers>
${premergeUT1.buildvers}
</included_buildvers>
</properties>
</profile>
<profile>
<id>premergeUT2</id>
<properties>
<included_buildvers>
${premergeUT2.buildvers}
</included_buildvers>
</properties>
</profile>
<profile>
<id>premergeUTF8</id>
<properties>
<included_buildvers>
${premergeUTF8.buildvers}
</included_buildvers>
</properties>
</profile>
<profile>
<!--
https://spark.apache.org/versioning-policy.html
Expand Down
51 changes: 31 additions & 20 deletions jenkins/spark-premerge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ if [[ $# -eq 1 ]]; then
BUILD_TYPE=$1

elif [[ $# -gt 1 ]]; then
echo "ERROR: too many parameters are provided"
>&2 echo "ERROR: too many parameters are provided"
exit 1
fi

Expand All @@ -36,28 +36,36 @@ mvn_verify() {
BASE_REF=$(git --no-pager log --oneline -1 | awk '{ print $NF }')
# file size check for pull request. The size of a committed file should be less than 1.5MiB
pre-commit run check-added-large-files --from-ref $BASE_REF --to-ref HEAD

# build the Spark 2.x explain jar
env -u SPARK_HOME $MVN_CMD -B $MVN_URM_MIRROR -Dbuildver=24X clean install -DskipTests

MVN_INSTALL_CMD="env -u SPARK_HOME $MVN_CMD -U -B $MVN_URM_MIRROR clean install $MVN_BUILD_ARGS -DskipTests -pl aggregator -am"
# build all the versions but only run unit tests on one 3.1.X version (base version covers this), one 3.2.X and one 3.3.X version.
# All others shims test should be covered in nightly pipelines
$MVN_INSTALL_CMD -DskipTests -Dbuildver=321cdh
$MVN_INSTALL_CMD -DskipTests -Dbuildver=312
$MVN_INSTALL_CMD -DskipTests -Dbuildver=313
[[ $BUILD_MAINTENANCE_VERSION_SNAPSHOTS == "true" ]] && $MVN_INSTALL_CMD -Dbuildver=314

$MVN_INSTALL_CMD -DskipTests -Dbuildver=320

for version in "${SPARK_SHIM_VERSIONS_SNAPSHOTS_TAIL[@]}"
do
echo "Spark version: $version"
# build and run unit tests on one specific version for each sub-version (e.g. 320, 330) except base version
# separate the versions to two ci stages (mvn_verify, ci_2) for balancing the duration
if [[ "${SPARK_SHIM_VERSIONS_PREMERGE_UT_1[@]}" =~ "$version" ]]; then
env -u SPARK_HOME $MVN_CMD -U -B $MVN_URM_MIRROR -Dbuildver=$version clean install $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools'
# build only for nosnapshot versions
elif [[ "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS_TAIL[@]}" =~ "$version" ]]; then
$MVN_INSTALL_CMD -DskipTests -Dbuildver=$version
# build only for snapshot versions
elif [[ $BUILD_MAINTENANCE_VERSION_SNAPSHOTS == "true" ]]; then
$MVN_INSTALL_CMD -Dbuildver=$version
fi
done

# enable UTF-8 for regular expression tests
env -u SPARK_HOME LC_ALL="en_US.UTF-8" $MVN_CMD $MVN_URM_MIRROR -Dbuildver=320 test $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools' \
-DwildcardSuites=com.nvidia.spark.rapids.ConditionalsSuite,com.nvidia.spark.rapids.RegularExpressionSuite,com.nvidia.spark.rapids.RegularExpressionTranspilerSuite
$MVN_INSTALL_CMD -DskipTests -Dbuildver=321
$MVN_INSTALL_CMD -DskipTests -Dbuildver=322
env -u SPARK_HOME $MVN_CMD -U -B $MVN_URM_MIRROR -Dbuildver=330 clean install $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools'
[[ $BUILD_MAINTENANCE_VERSION_SNAPSHOTS == "true" ]] && $MVN_INSTALL_CMD -DskipTests -Dbuildver=331
for version in "${SPARK_SHIM_VERSIONS_PREMERGE_UTF8[@]}"
do
env -u SPARK_HOME LC_ALL="en_US.UTF-8" $MVN_CMD $MVN_URM_MIRROR -Dbuildver=$version test $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools' \
-DwildcardSuites=com.nvidia.spark.rapids.ConditionalsSuite,com.nvidia.spark.rapids.RegularExpressionSuite,com.nvidia.spark.rapids.RegularExpressionTranspilerSuite
done

# TODO: move it to BUILD_MAINTENANCE_VERSION_SNAPSHOTS when we resolve all spark340 build issues
[[ $BUILD_FEATURE_VERSION_SNAPSHOTS == "true" ]] && $MVN_INSTALL_CMD -DskipTests -Dbuildver=340

Expand Down Expand Up @@ -140,8 +148,11 @@ ci_2() {

# put some mvn tests here to balance durations of parallel stages
echo "Run mvn package..."
env -u SPARK_HOME $MVN_CMD -U -B $MVN_URM_MIRROR -Dbuildver=320 clean package $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools'
for version in "${SPARK_SHIM_VERSIONS_PREMERGE_UT_2[@]}"
do
env -u SPARK_HOME $MVN_CMD -U -B $MVN_URM_MIRROR -Dbuildver=$version clean package $MVN_BUILD_ARGS \
-Dpytest.TEST_TAGS='' -pl '!tools'
done
}


Expand Down
55 changes: 51 additions & 4 deletions jenkins/version-def.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@

set -e

# PHASE_TYPE: CICD phase at which the script is called, to specify Spark shim versions.
# regular: noSnapshots + snapshots
# pre-release: noSnapshots only
PHASE_TYPE=regular

if [[ $# -eq 1 ]]; then
PHASE_TYPE=$1
elif [[ $# -gt 1 ]]; then
>&2 echo "ERROR: too many parameters are provided"
exit 1
fi

# Split abc=123 from $OVERWRITE_PARAMS
# $OVERWRITE_PARAMS patten 'abc=123;def=456;'
PRE_IFS=$IFS
Expand Down Expand Up @@ -48,9 +60,44 @@ SPARK_REPO=${SPARK_REPO:-"$URM_URL"}
echo "CUDF_VER: $CUDF_VER, CUDA_CLASSIFIER: $CUDA_CLASSIFIER, PROJECT_VER: $PROJECT_VER \
SPARK_VER: $SPARK_VER, SCALA_BINARY_VER: $SCALA_BINARY_VER"

# Spark shim versions
# get Spark shim versions from pom
function set_env_var_SPARK_SHIM_VERSIONS_ARR() {
PROFILE_OPT=$1
SPARK_SHIM_VERSIONS_STR=$(mvn -B help:evaluate -q -pl dist $PROFILE_OPT -Dexpression=included_buildvers -DforceStdout)
SPARK_SHIM_VERSIONS_STR=$(echo $SPARK_SHIM_VERSIONS_STR)
IFS=", " <<< $SPARK_SHIM_VERSIONS_STR read -r -a SPARK_SHIM_VERSIONS_ARR
}
# Psnapshots: snapshots + noSnapshots
set_env_var_SPARK_SHIM_VERSIONS_ARR -Psnapshots
SPARK_SHIM_VERSIONS_SNAPSHOTS=("${SPARK_SHIM_VERSIONS_ARR[@]}")
# PnoSnapshots: noSnapshots only
set_env_var_SPARK_SHIM_VERSIONS_ARR -PnoSnapshots
SPARK_SHIM_VERSIONS_NOSNAPSHOTS=("${SPARK_SHIM_VERSIONS_ARR[@]}")
# Spark shim versions list based on given profile option (snapshots or noSnapshots)
case $PHASE_TYPE in
pre-release)
SPARK_SHIM_VERSIONS=("${SPARK_SHIM_VERSIONS_SNAPSHOTS[@]}")
;;

SPARK_SHIM_VERSIONS_STR=${SPARK_SHIM_VERSIONS_STR:-"311 321cdh 312 313 314 320 321 322 330 331"}

IFS=" " <<< $SPARK_SHIM_VERSIONS_STR read -r -a SPARK_SHIM_VERSIONS

*)
SPARK_SHIM_VERSIONS=("${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]}")
;;
esac
# base version
SPARK_BASE_SHIM_VERSION=${SPARK_SHIM_VERSIONS[0]}
# tail snapshots
SPARK_SHIM_VERSIONS_SNAPSHOTS_TAIL=("${SPARK_SHIM_VERSIONS_SNAPSHOTS[@]:1}")
# tail noSnapshots
SPARK_SHIM_VERSIONS_NOSNAPSHOTS_TAIL=("${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]:1}")
# build and run unit tests on one specific version for each sub-version (e.g. 320, 330)
# separate the versions to two parts (premergeUT1, premergeUT2) for balancing the duration
set_env_var_SPARK_SHIM_VERSIONS_ARR -PpremergeUT1
SPARK_SHIM_VERSIONS_PREMERGE_UT_1=("${SPARK_SHIM_VERSIONS_ARR[@]}")
set_env_var_SPARK_SHIM_VERSIONS_ARR -PpremergeUT2
SPARK_SHIM_VERSIONS_PREMERGE_UT_2=("${SPARK_SHIM_VERSIONS_ARR[@]}")
# utf-8 cases
set_env_var_SPARK_SHIM_VERSIONS_ARR -PpremergeUTF8
SPARK_SHIM_VERSIONS_PREMERGE_UTF8=("${SPARK_SHIM_VERSIONS_ARR[@]}")

echo "SPARK_BASE_SHIM_VERSION: $SPARK_BASE_SHIM_VERSION"

0 comments on commit 6d3f66c

Please sign in to comment.