Skip to content

Support split non-AST-able join condition for BroadcastNestedLoopJoin #7068

Support split non-AST-able join condition for BroadcastNestedLoopJoin

Support split non-AST-able join condition for BroadcastNestedLoopJoin #7068

# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A workflow to run mvn verify check
name: mvn[compile,RAT,scalastyle,docgen]
on:
pull_request:
types: [opened, synchronize, reopened]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
COMMON_MVN_FLAGS: >
-Ddist.jar.compress=false
-DskipTests
-Dskip
jobs:
get-shim-versions-from-dist:
runs-on: ubuntu-latest
outputs:
defaultSparkVersion: ${{ steps.allShimVersionsStep.outputs.defaultSparkVersion }}
sparkTailVersions: ${{ steps.allShimVersionsStep.outputs.tailVersions }}
sparkJDKVersions: ${{ steps.allShimVersionsStep.outputs.jdkVersions }}
scala213Versions: ${{ steps.allShimVersionsStep.outputs.scala213Versions }}
steps:
- uses: actions/checkout@v3 # refs/pull/:prNumber/merge
- name: Setup Java and Maven Env
uses: actions/setup-java@v3
with:
distribution: adopt
java-version: 8
- name: all shim versions
id: allShimVersionsStep
run: |
set -x
. jenkins/version-def.sh
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS_TAIL[@]}")
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1}
# get private artifact version
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout)
# do not add empty snapshot versions or when private version is released one (does not include snapshot shims)
if [[ ${#SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]} -gt 0 && $privateVer == *"-SNAPSHOT" ]]; then
svArrBodySnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":true}" "${SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]}")
svArrBodySnapshot=${svArrBodySnapshot:1}
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot,$svArrBodySnapshot)
else
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot)
fi
echo "tailVersions=$svJsonStr" >> $GITHUB_OUTPUT
# default version
echo "defaultSparkVersion=${SPARK_BASE_SHIM_VERSION}" >> $GITHUB_OUTPUT
jdkHeadVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":8}" "${SPARK_BASE_SHIM_VERSION}")
# jdk11
jdk11VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}")
# jdk17
jdk17VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}")
# jdk
jdkVersionArrBody=$jdkHeadVersionArrBody$jdk11VersionArrBody$jdk17VersionArrBody
jdkVersionArrBody=${jdkVersionArrBody:1}
jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody)
echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT
SCALA_BINARY_VER=2.13
. jenkins/version-def.sh
svArrBodyNoSnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":false}" "${SPARK_SHIM_VERSIONS_NOSNAPSHOTS[@]}")
svArrBodyNoSnapshot=${svArrBodyNoSnapshot:1}
# get private artifact version
privateVer=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout)
# do not add empty snapshot versions or when private version is released one (does not include snapshot shims)
if [[ ${#SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]} -gt 0 && $privateVer == *"-SNAPSHOT" ]]; then
svArrBodySnapshot=$(printf ",{\"spark-version\":\"%s\",\"isSnapshot\":true}" "${SPARK_SHIM_VERSIONS_SNAPSHOTS_ONLY[@]}")
svArrBodySnapshot=${svArrBodySnapshot:1}
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot,$svArrBodySnapshot)
else
svJsonStr=$(printf {\"include\":[%s]} $svArrBodyNoSnapshot)
fi
echo "scala213Versions=$svJsonStr" >> $GITHUB_OUTPUT
package-tests:
needs: get-shim-versions-from-dist
continue-on-error: ${{ matrix.isSnapshot }}
strategy:
matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkTailVersions) }}
fail-fast: false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3 # refs/pull/:prNumber/merge
- name: Setup Java and Maven Env
uses: actions/setup-java@v3
with:
distribution: adopt
java-version: 8
- name: check runtime before tests
run: |
env | grep JAVA
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
- name: package tests check
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_8_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# test command
mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \
-pl integration_tests,tests -am \
-P 'individual,pre-merge' \
-Dbuildver=${{ matrix.spark-version }} \
-Dmaven.scalastyle.skip=true \
-Drat.skip=true \
${{ env.COMMON_MVN_FLAGS }}
package-tests-scala213:
needs: get-shim-versions-from-dist
continue-on-error: ${{ matrix.isSnapshot }}
strategy:
matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.scala213Versions) }}
fail-fast: false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3 # refs/pull/:prNumber/merge
- name: Setup Java and Maven Env
uses: actions/setup-java@v3
with:
distribution: adopt
java-version: 8
- name: check runtime before tests
run: |
env | grep JAVA
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
- name: package tests check
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_8_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# verify Scala 2.13 build files
./build/make-scala-version-build-files.sh 2.13
# verify git status
if ! git diff --exit-code 'scala2.13/*'; then
echo "Generated Scala 2.13 build files don't match what's in repository"
exit 1
fi
# change to Scala 2.13 Directory
cd scala2.13
# test command
mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \
-pl integration_tests,tests -am \
-P 'individual,pre-merge' \
-Dbuildver=${{ matrix.spark-version }} \
-Dmaven.scalastyle.skip=true \
-Drat.skip=true \
${{ env.COMMON_MVN_FLAGS }}
verify-all-modules:
needs: get-shim-versions-from-dist
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDKVersions) }}
steps:
- uses: actions/checkout@v3 # refs/pull/:prNumber/merge
- name: Setup Java and Maven Env
uses: actions/setup-java@v3
with:
distribution: adopt
java-version: ${{ matrix.java-version }}
- name: check runtime before tests
run: |
env | grep JAVA
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
- name: Build JDK
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_${{ matrix.java-version }}_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# test command
mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify \
-P "individual,pre-merge" \
-Dbuildver=${{ matrix.spark-version }} \
${{ env.COMMON_MVN_FLAGS }}
install-modules:
needs: get-shim-versions-from-dist
runs-on: ubuntu-latest
strategy:
matrix:
maven-version: [3.6.3, 3.8.8, 3.9.3]
steps:
- uses: actions/checkout@v3 # refs/pull/:prNumber/merge
- name: Setup Java
uses: actions/setup-java@v3
with:
distribution: adopt
java-version: 11
- name: Setup Maven Wrapper
run: mvn wrapper:wrapper -Dmaven=${{ matrix.maven-version }}
- name: check runtime before tests
run: |
env | grep JAVA
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
- name: Install with Maven ${{ matrix.maven-version }}
run: |
# https://github.com/NVIDIA/spark-rapids/issues/8847
# specify expected versions
export JAVA_HOME=${JAVA_HOME_11_X64}
export PATH=${JAVA_HOME}/bin:${PATH}
java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH"
# test command
./mvnw -Dmaven.wagon.http.retryHandler.count=3 -B install \
-P "individual,pre-merge" \
-Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.defaultSparkVersion }} \
${{ env.COMMON_MVN_FLAGS }}