diff --git a/.test-infra/jenkins/CommonTestProperties.groovy b/.test-infra/jenkins/CommonTestProperties.groovy index 9abc08b4fd98..01ad3b513b97 100644 --- a/.test-infra/jenkins/CommonTestProperties.groovy +++ b/.test-infra/jenkins/CommonTestProperties.groovy @@ -42,8 +42,8 @@ class CommonTestProperties { JAVA: [ DATAFLOW: ":runners:google-cloud-dataflow-java", TEST_DATAFLOW: ":runners:google-cloud-dataflow-java", - SPARK: ":runners:spark", - SPARK_STRUCTURED_STREAMING: ":runners:spark", + SPARK: ":runners:spark:2", + SPARK_STRUCTURED_STREAMING: ":runners:spark:2", FLINK: ":runners:flink:${CommonTestProperties.getFlinkVersion()}", DIRECT: ":runners:direct-java" ], diff --git a/.test-infra/jenkins/job_PostCommit_Java_Nexmark_Spark.groovy b/.test-infra/jenkins/job_PostCommit_Java_Nexmark_Spark.groovy index cab81c49a4a7..a41760ee92e0 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_Nexmark_Spark.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_Nexmark_Spark.groovy @@ -44,7 +44,7 @@ NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Nexmark_ rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches('-Pnexmark.runner=":runners:spark"' + + switches('-Pnexmark.runner=":runners:spark:2"' + ' -Pnexmark.args="' + [ commonJobProperties.mapToArgString(nexmarkBigQueryArgs), @@ -62,7 +62,7 @@ NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Nexmark_ rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches('-Pnexmark.runner=":runners:spark"' + + switches('-Pnexmark.runner=":runners:spark:2"' + ' -Pnexmark.args="' + [ commonJobProperties.mapToArgString(nexmarkBigQueryArgs), @@ -81,7 +81,7 @@ NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Nexmark_ rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches('-Pnexmark.runner=":runners:spark"' + + switches('-Pnexmark.runner=":runners:spark:2"' + ' -Pnexmark.args="' + [ commonJobProperties.mapToArgString(nexmarkBigQueryArgs), @@ -101,7 +101,7 @@ NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Nexmark_ rootBuildScriptDir(commonJobProperties.checkoutDir) tasks(':sdks:java:testing:nexmark:run') commonJobProperties.setGradleSwitches(delegate) - switches('-Pnexmark.runner=":runners:spark"' + + switches('-Pnexmark.runner=":runners:spark:2"' + ' -Pnexmark.args="' + [ commonJobProperties.mapToArgString(nexmarkBigQueryArgs), diff --git a/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.groovy b/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.groovy index d20b5484bfa2..14cf815a6752 100644 --- a/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.groovy @@ -36,7 +36,7 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Java_ValidatesRunner_SparkSt steps { gradle { rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(':runners:spark:validatesStructuredStreamingRunnerBatch') + tasks(':runners:spark:2:validatesStructuredStreamingRunnerBatch') commonJobProperties.setGradleSwitches(delegate) } } diff --git a/build.gradle.kts b/build.gradle.kts index 7ab8401954c8..ae4ef5d960e4 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -181,7 +181,7 @@ task("javaHadoopVersionsTest") { dependsOn(":sdks:java:io:hcatalog:hadoopVersionsTest") dependsOn(":sdks:java:io:parquet:hadoopVersionsTest") dependsOn(":sdks:java:extensions:sorter:hadoopVersionsTest") - dependsOn(":runners:spark:hadoopVersionsTest") + dependsOn(":runners:spark:2:hadoopVersionsTest") } task("sqlPostCommit") { diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index ff0ed9f9795e..15efd9938229 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -1631,7 +1631,7 @@ class BeamModulePlugin implements Plugin { } if (runner?.equalsIgnoreCase('spark')) { - testRuntime it.project(path: ":runners:spark", configuration: 'testRuntime') + testRuntime it.project(path: ":runners:spark:2", configuration: 'testRuntime') testRuntime project.library.java.spark_core testRuntime project.library.java.spark_streaming @@ -2337,7 +2337,7 @@ class BeamModulePlugin implements Plugin { dependsOn = ['installGcpTest'] mustRunAfter = [ ":runners:flink:${project.ext.latestFlinkVersion}:job-server:shadowJar", - ':runners:spark:job-server:shadowJar', + ':runners:spark:2:job-server:shadowJar', ':sdks:python:container:py36:docker', ':sdks:python:container:py37:docker', ':sdks:python:container:py38:docker', @@ -2352,7 +2352,7 @@ class BeamModulePlugin implements Plugin { "--parallelism=2", "--sdk_worker_parallelism=1", "--flink_job_server_jar=${project.project(flinkJobServerProject).shadowJar.archivePath}", - "--spark_job_server_jar=${project.project(':runners:spark:job-server').shadowJar.archivePath}", + "--spark_job_server_jar=${project.project(':runners:spark:2:job-server').shadowJar.archivePath}", ] if (isStreaming) options += [ diff --git a/examples/java/build.gradle b/examples/java/build.gradle index 95d2b48fd221..97af4ad483ce 100644 --- a/examples/java/build.gradle +++ b/examples/java/build.gradle @@ -105,7 +105,7 @@ dependencies { // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath // before 4.1.8.Final defined by Apache Beam sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final" - sparkRunnerPreCommit project(":runners:spark") + sparkRunnerPreCommit project(":runners:spark:2") sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system") sparkRunnerPreCommit library.java.spark_streaming sparkRunnerPreCommit library.java.spark_core diff --git a/examples/kotlin/build.gradle b/examples/kotlin/build.gradle index 825651311f5c..4cf1634e4930 100644 --- a/examples/kotlin/build.gradle +++ b/examples/kotlin/build.gradle @@ -82,7 +82,7 @@ dependencies { // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath // before 4.1.8.Final defined by Apache Beam sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final" - sparkRunnerPreCommit project(":runners:spark") + sparkRunnerPreCommit project(":runners:spark:2") sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system") sparkRunnerPreCommit library.java.spark_streaming sparkRunnerPreCommit library.java.spark_core diff --git a/release/build.gradle.kts b/release/build.gradle.kts index 79135c245cf3..dc08e50cec91 100644 --- a/release/build.gradle.kts +++ b/release/build.gradle.kts @@ -38,7 +38,7 @@ task("runJavaExamplesValidationTask") { description = "Run the Beam quickstart across all Java runners" dependsOn(":runners:direct-java:runQuickstartJavaDirect") dependsOn(":runners:google-cloud-dataflow-java:runQuickstartJavaDataflow") - dependsOn(":runners:spark:runQuickstartJavaSpark") + dependsOn(":runners:spark:2:runQuickstartJavaSpark") dependsOn(":runners:flink:1.10:runQuickstartJavaFlinkLocal") dependsOn(":runners:direct-java:runMobileGamingJavaDirect") dependsOn(":runners:google-cloud-dataflow-java:runMobileGamingJavaDataflow") diff --git a/release/src/main/scripts/run_rc_validation.sh b/release/src/main/scripts/run_rc_validation.sh index 1cb7034a508d..6be50037a11a 100755 --- a/release/src/main/scripts/run_rc_validation.sh +++ b/release/src/main/scripts/run_rc_validation.sh @@ -210,7 +210,7 @@ if [[ "$java_quickstart_spark_local" = true ]]; then echo "*************************************************************" echo "* Running Java Quickstart with Spark local runner" echo "*************************************************************" - ./gradlew :runners:spark:runQuickstartJavaSpark \ + ./gradlew :runners:spark:2:runQuickstartJavaSpark \ -Prepourl=${REPO_URL} \ -Pver=${RELEASE_VER} else diff --git a/sdks/go/test/build.gradle b/sdks/go/test/build.gradle index cd84e3fc22a7..030e460a0c63 100644 --- a/sdks/go/test/build.gradle +++ b/sdks/go/test/build.gradle @@ -97,12 +97,12 @@ task flinkXlangValidatesRunner { // TODO(BEAM-11415): Merge this into existing ValidatesRunner gradle rules. task sparkXlangValidatesRunner { dependsOn ":sdks:go:test:goBuild" - dependsOn ":runners:spark:job-server:shadowJar" + dependsOn ":runners:spark:2:job-server:shadowJar" dependsOn ":sdks:java:testing:expansion-service:buildTestExpansionServiceJar" doLast { def options = [ "--runner spark", - "--spark_job_server_jar ${project(":runners:spark:job-server").shadowJar.archivePath}", + "--spark_job_server_jar ${project(":runners:spark:2:job-server").shadowJar.archivePath}", "--expansion_service_jar ${project(":sdks:java:testing:expansion-service").buildTestExpansionServiceJar.archivePath}", ] exec { @@ -182,12 +182,12 @@ task flinkValidatesRunner { task sparkValidatesRunner { dependsOn ":sdks:go:test:goBuild" - dependsOn ":runners:spark:job-server:shadowJar" + dependsOn ":runners:spark:2:job-server:shadowJar" doLast { def options = [ "--runner spark", "--parallel 1", // prevent memory overuse - "--spark_job_server_jar ${project(":runners:spark:job-server").shadowJar.archivePath}", + "--spark_job_server_jar ${project(":runners:spark:2:job-server").shadowJar.archivePath}", ] exec { executable "sh" diff --git a/sdks/java/io/hadoop-format/build.gradle b/sdks/java/io/hadoop-format/build.gradle index e3886ef755da..4b737493f51d 100644 --- a/sdks/java/io/hadoop-format/build.gradle +++ b/sdks/java/io/hadoop-format/build.gradle @@ -103,7 +103,7 @@ dependencies { delegate.add("sparkRunner", project(path: ":sdks:java:io:hadoop-format", configuration: "testRuntime")) sparkRunner project(path: ":examples:java", configuration: "testRuntime") - sparkRunner project(":runners:spark") + sparkRunner project(":runners:spark:2") sparkRunner project(":sdks:java:io:hadoop-file-system") sparkRunner library.java.spark_streaming sparkRunner library.java.spark_core diff --git a/sdks/java/testing/load-tests/build.gradle b/sdks/java/testing/load-tests/build.gradle index 5c60110bf129..445bdafb0458 100644 --- a/sdks/java/testing/load-tests/build.gradle +++ b/sdks/java/testing/load-tests/build.gradle @@ -39,7 +39,7 @@ def runnerDependency = (project.hasProperty(runnerProperty) ? project.getProperty(runnerProperty) : ":runners:direct-java") -def shouldProvideSpark = ":runners:spark".equals(runnerDependency) +def shouldProvideSpark = ":runners:spark:2".equals(runnerDependency) def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(runnerDependency) def runnerConfiguration = ":runners:direct-java".equals(runnerDependency) ? "shadow" : null diff --git a/sdks/java/testing/nexmark/build.gradle b/sdks/java/testing/nexmark/build.gradle index 74184c92b68e..494f84565327 100644 --- a/sdks/java/testing/nexmark/build.gradle +++ b/sdks/java/testing/nexmark/build.gradle @@ -36,7 +36,7 @@ def nexmarkProfilingProperty = "nexmark.profile" def nexmarkRunnerProperty = "nexmark.runner" def nexmarkRunnerDependency = project.findProperty(nexmarkRunnerProperty) ?: ":runners:direct-java" -def shouldProvideSpark = ":runners:spark".equals(nexmarkRunnerDependency) +def shouldProvideSpark = ":runners:spark:2".equals(nexmarkRunnerDependency) def isDataflowRunner = ":runners:google-cloud-dataflow-java".equals(nexmarkRunnerDependency) def runnerConfiguration = ":runners:direct-java".equals(nexmarkRunnerDependency) ? "shadow" : null @@ -106,7 +106,7 @@ if (shouldProvideSpark) { // // Parameters: // -Pnexmark.runner -// Specify a runner subproject, such as ":runners:spark" or ":runners:flink:1.10" +// Specify a runner subproject, such as ":runners:spark:2" or ":runners:flink:1.10" // Defaults to ":runners:direct-java" // // -Pnexmark.args diff --git a/sdks/python/apache_beam/runners/portability/spark_runner.py b/sdks/python/apache_beam/runners/portability/spark_runner.py index 6f4162b69812..19fb0d9be144 100644 --- a/sdks/python/apache_beam/runners/portability/spark_runner.py +++ b/sdks/python/apache_beam/runners/portability/spark_runner.py @@ -82,7 +82,7 @@ def path_to_jar(self): self._jar) return self._jar else: - return self.path_to_beam_jar(':runners:spark:job-server:shadowJar') + return self.path_to_beam_jar(':runners:spark:2:job-server:shadowJar') def java_arguments( self, job_port, artifact_port, expansion_port, artifacts_dir): diff --git a/sdks/python/apache_beam/runners/portability/spark_runner_test.py b/sdks/python/apache_beam/runners/portability/spark_runner_test.py index 3473cada687c..1d53dd933eaa 100644 --- a/sdks/python/apache_beam/runners/portability/spark_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/spark_runner_test.py @@ -87,7 +87,7 @@ def parse_options(self, request): self.set_spark_job_server_jar( known_args.spark_job_server_jar or job_server.JavaJarJobServer.path_to_beam_jar( - ':runners:spark:job-server:shadowJar')) + ':runners:spark:2:job-server:shadowJar')) self.environment_type = known_args.environment_type self.environment_options = known_args.environment_options diff --git a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py index 6b9e6fd3b2b3..eceb29333ca4 100644 --- a/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py +++ b/sdks/python/apache_beam/runners/portability/spark_uber_jar_job_server.py @@ -77,7 +77,7 @@ def executable_jar(self): url = self._executable_jar else: url = job_server.JavaJarJobServer.path_to_beam_jar( - ':runners:spark:job-server:shadowJar') + ':runners:spark:2:job-server:shadowJar') return job_server.JavaJarJobServer.local_jar(url) def create_beam_job(self, job_id, job_name, pipeline, options): diff --git a/sdks/python/test-suites/portable/common.gradle b/sdks/python/test-suites/portable/common.gradle index cb1b5fffcf10..e0479edc1ec4 100644 --- a/sdks/python/test-suites/portable/common.gradle +++ b/sdks/python/test-suites/portable/common.gradle @@ -121,14 +121,14 @@ task createProcessWorker { def createSparkRunnerTestTask(String workerType) { def taskName = "sparkCompatibilityMatrix${workerType}" - // `project(':runners:spark:job-server').shadowJar.archivePath` is not resolvable until runtime, so hard-code it here. - def jobServerJar = "${rootDir}/runners/spark/job-server/build/libs/beam-runners-spark-job-server-${version}.jar" + // `project(':runners:spark:2:job-server').shadowJar.archivePath` is not resolvable until runtime, so hard-code it here. + def jobServerJar = "${rootDir}/runners/spark/2/job-server/build/libs/beam-runners-spark-2-job-server-${version}.jar" def options = "--spark_job_server_jar=${jobServerJar} --environment_type=${workerType}" if (workerType == 'PROCESS') { options += " --environment_options=process_command=${buildDir.absolutePath}/sdk_worker.sh" } def task = toxTask(taskName, 'spark-runner-test', options) - task.dependsOn ':runners:spark:job-server:shadowJar' + task.dependsOn ':runners:spark:2:job-server:shadowJar' if (workerType == 'DOCKER') { task.dependsOn ext.pythonContainerTask } else if (workerType == 'PROCESS') { @@ -155,7 +155,7 @@ project.task("preCommitPy${pythonVersionSuffix}") { project.task("postCommitPy${pythonVersionSuffix}") { dependsOn = ['setupVirtualenv', "postCommitPy${pythonVersionSuffix}IT", - ':runners:spark:job-server:shadowJar', + ':runners:spark:2:job-server:shadowJar', 'portableLocalRunnerJuliaSetWithSetupPy', 'portableWordCountSparkRunnerBatch'] } @@ -227,7 +227,7 @@ def addTestJavaJarCreator(String runner, Task jobServerJarTask) { // TODO(BEAM-11333) Update and test multiple Flink versions. addTestJavaJarCreator("FlinkRunner", tasks.getByPath(":runners:flink:${latestFlinkVersion}:job-server:shadowJar")) -addTestJavaJarCreator("SparkRunner", tasks.getByPath(":runners:spark:job-server:shadowJar")) +addTestJavaJarCreator("SparkRunner", tasks.getByPath(":runners:spark:2:job-server:shadowJar")) def addTestFlinkUberJar(boolean saveMainSession) { project.tasks.create(name: "testUberJarFlinkRunner${saveMainSession ? 'SaveMainSession' : ''}") { diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 8c693d1f8f09..097d4f9c934d 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -909,7 +909,7 @@ _Note_: -Prepourl and -Pver can be found in the RC vote email sent by Release Ma ``` **Spark Local Runner** ``` - ./gradlew :runners:spark:runQuickstartJavaSpark \ + ./gradlew :runners:spark:2:runQuickstartJavaSpark \ -Prepourl=https://repository.apache.org/content/repositories/orgapachebeam-${KEY} \ -Pver=${RELEASE_VERSION} ``` diff --git a/website/www/site/content/en/documentation/runners/spark.md b/website/www/site/content/en/documentation/runners/spark.md index 433ae2dd9238..59c66489011e 100644 --- a/website/www/site/content/en/documentation/runners/spark.md +++ b/website/www/site/content/en/documentation/runners/spark.md @@ -193,7 +193,7 @@ download it on the [Downloads page](/get-started/downloads/). {{< paragraph class="language-py" >}} 1. Start the JobService endpoint: * with Docker (preferred): `docker run --net=host apache/beam_spark_job_server:latest` - * or from Beam source code: `./gradlew :runners:spark:job-server:runShadow` + * or from Beam source code: `./gradlew :runners:spark:2:job-server:runShadow` {{< /paragraph >}} {{< paragraph class="language-py" >}} @@ -228,7 +228,7 @@ For more details on the different deployment modes see: [Standalone](https://spa {{< paragraph class="language-py" >}} 2. Start JobService that will connect with the Spark master: * with Docker (preferred): `docker run --net=host apache/beam_spark_job_server:latest --spark-master-url=spark://localhost:7077` - * or from Beam source code: `./gradlew :runners:spark:job-server:runShadow -PsparkMasterUrl=spark://localhost:7077` + * or from Beam source code: `./gradlew :runners:spark:2:job-server:runShadow -PsparkMasterUrl=spark://localhost:7077` {{< /paragraph >}} {{< paragraph class="language-py" >}}3. Submit the pipeline as above. diff --git a/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md b/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md index 84d9faf59c83..b37c32880114 100644 --- a/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md +++ b/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md @@ -494,7 +494,7 @@ configure logging. Batch Mode: ./gradlew :sdks:java:testing:nexmark:run \ - -Pnexmark.runner=":runners:spark" \ + -Pnexmark.runner=":runners:spark:2" \ -Pnexmark.args=" --runner=SparkRunner --suite=SMOKE @@ -506,7 +506,7 @@ Batch Mode: Streaming Mode: ./gradlew :sdks:java:testing:nexmark:run \ - -Pnexmark.runner=":runners:spark" \ + -Pnexmark.runner=":runners:spark:2" \ -Pnexmark.args=" --runner=SparkRunner --suite=SMOKE