From 2a7977153a392a905a4dd6c3613a2f8b3383884c Mon Sep 17 00:00:00 2001 From: "harsh.takkar" Date: Wed, 30 Mar 2022 17:14:17 +0530 Subject: [PATCH 1/4] feat(MVA) added Dockerfile,Jenkins,and .Version file updated pom.xml --- .VERSION | 1 + Dockerfile | 29 ++++++++++++++ Jenkinsfile | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ pom.xml | 54 +++++--------------------- 4 files changed, 150 insertions(+), 44 deletions(-) create mode 100644 .VERSION create mode 100644 Dockerfile create mode 100644 Jenkinsfile diff --git a/.VERSION b/.VERSION new file mode 100644 index 0000000000000..9cfad111ef6aa --- /dev/null +++ b/.VERSION @@ -0,0 +1 @@ +3.2.1,1.0 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000..451c3453f8868 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +ARG spark_image_tag=3.2.1-hadoop3.3-1.3 + +FROM artifacts.ggn.in.guavus.com:4244/spark:${spark_image_tag} + +ARG spark_uid=185 + +USER root + +RUN apt-get update -y && \ + apt-get -y install curl && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.901/aws-java-sdk-s3-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-s3-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.11.901/aws-java-sdk-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.901/aws-java-sdk-core-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-core-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-dynamodb/1.11.901/aws-java-sdk-dynamodb-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-dynamodb-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.11.1/commons-pool2-2.11.1.jar -o /opt/spark/jars/commons-pool2-2.11.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.1/hadoop-aws-3.3.1.jar -o /opt/spark/jars/hadoop-aws-3.3.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-avro_2.12/3.2.1/spark-avro_2.12-3.2.1.jar -o /opt/spark/jars/spark-avro_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-token-provider-kafka-0-10_2.12/3.2.1/spark-token-provider-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-token-provider-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/0.9.0/jets3t-0.9.0.jar -o /opt/spark/jars/jets3t-0.9.0.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.12/3.2.1/spark-sql-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-sql-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/3.0.0/kafka-clients-3.0.0.jar -o /opt/spark/jars/kafka-clients-3.0.0.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-streaming-kafka-0-10_2.12/3.2.1/spark-streaming-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-streaming-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.13.0/jmx_prometheus_javaagent-0.13.0.jar -o /opt/spark/jars/jmx_prometheus_javaagent-0.13.0.jar + + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# Specify the User that the actual main process will run as +USER ${spark_uid} \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000000..28454f03c2cf0 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,110 @@ +@Library('jenkins_lib')_ +pipeline + { + agent {label 'slave'} + + environment { + + project = "apache-spark"; + buildNum = currentBuild.getNumber() ; + //ex. like feat, release, fix + buildType = BRANCH_NAME.split("/").first(); + //ex. like OP- + branchVersion = BRANCH_NAME.split("/").last().toUpperCase(); + // Define global environment variables in this section + } + + stages { + stage("Define Release version") { + steps { + script { + //Global Lib for Environment Versions Definition + versionDefine() + env.GUAVUS_SPARK_VERSION = "${VERSION}".split(",").first(); + env.GUAVUS_DOCKER_VERSION = "${VERSION}".split(",").last(); + env.dockerTag = "${GUAVUS_SPARK_VERSION}-hadoop3.3-${GUAVUS_DOCKER_VERSION}-${RELEASE}" + echo "GUAVUS_SPARK_VERSION : ${GUAVUS_SPARK_VERSION}" + echo "GUAVUS_DOCKER_VERSION : ${GUAVUS_DOCKER_VERSION}" + echo "DOCKER TAG : ${dockerTag}" + } + } + } + + stage("Versioning") { + steps { + echo "GUAVUS_SPARK_VERSION : ${GUAVUS_SPARK_VERSION}" + echo "GUAVUS_DOCKER_VERSION : ${GUAVUS_DOCKER_VERSION}" + sh 'mvn versions:set -DnewVersion=${GUAVUS_SPARK_VERSION}' + } + } + + stage("Initialize Variable") { + steps { + script { + PUSH_JAR = false; + PUSH_DOCKER = false; + DOCKER_IMAGE_NAME = "spark-opsiq"; + PYSPARK_DOCKER_IMAGE_NAME = "spark-py-opsiq"; + longCommit = sh(returnStdout: true, script: "git rev-parse HEAD").trim() + + if( env.buildType in ['release'] ) + { + PUSH_JAR = false; // enable it if we have changes in code + PUSH_DOCKER = true; + } + else if ( env.buildType ==~ /PR-.*/ ) { + PUSH_DOCKER = true; + } + + } + + } + } + + stage("Push JAR to Maven Artifactory") { + when { + expression { PUSH_JAR == true } + } + steps { + script { + echo "Pushing JAR to Maven Artifactory" + sh "mvn deploy -U -Dcheckstyle.skip=true -Denforcer.skip=true -DskipTests=true;" + } + } + } + + stage("Build and Push Docker") { + when { + expression { PUSH_DOCKER == true } + } + stages { + stage("Create Docker Image") { + steps { + script { + echo "Creating docker build..." + sh "./dev/make-distribution.sh --name guavus_spark-${GUAVUS_SPARK_VERSION}-3.3 -Phive -Phive-thriftserver -Pkubernetes -Phadoop-3.3 -Dhadoop.version=3.3.1" + sh "./dist/bin/docker-image-tool.sh -r artifacts.ggn.in.guavus.com:4244 -t ${GUAVUS_SPARK_VERSION}-hadoop3.3-${GUAVUS_DOCKER_VERSION} build" + sh "./dist/bin/docker-image-tool.sh -r artifacts.ggn.in.guavus.com:4244 -t ${GUAVUS_SPARK_VERSION}-hadoop3.3-${GUAVUS_DOCKER_VERSION} push" + sh "./dist/bin/docker-image-tool.sh -r artifacts.ggn.in.guavus.com:4244 -t ${GUAVUS_SPARK_VERSION}-hadoop3.3-${GUAVUS_DOCKER_VERSION} -p ./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile build" + sh "./dist/bin/docker-image-tool.sh -r artifacts.ggn.in.guavus.com:4244 -t ${GUAVUS_SPARK_VERSION}-hadoop3.3-${GUAVUS_DOCKER_VERSION} -p ./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile push" + sh "docker build -t ${DOCKER_IMAGE_NAME} --build-arg GIT_HEAD=${longCommit} --build-arg GIT_BRANCH=${env.BRANCH_NAME} --build-arg VERSION=${dockerTag} --build-arg BUILD_NUMBER=${env.BUILD_NUMBER} ." + sh "docker build -f PysparkDockerfile -t ${PYSPARK_DOCKER_IMAGE_NAME} --build-arg GIT_HEAD=${longCommit} --build-arg GIT_BRANCH=${env.BRANCH_NAME} --build-arg VERSION=${dockerTag} --build-arg BUILD_NUMBER=${env.BUILD_NUMBER} ." + } + } + } + + stage("PUSH Docker") { + steps { + script { + echo "Docker PUSH..." + docker_push( buildType, DOCKER_IMAGE_NAME ) + docker_push( buildType, PYSPARK_DOCKER_IMAGE_NAME ) + } + } + } + } + } + + } + + } diff --git a/pom.xml b/pom.xml index 183d2588402b1..1ec8410fd27ec 100644 --- a/pom.xml +++ b/pom.xml @@ -37,50 +37,16 @@ repo - - scm:git:git@github.com:apache/spark.git - scm:git:https://gitbox.apache.org/repos/asf/spark.git - scm:git:git@github.com:apache/spark.git - HEAD - - - - matei - Matei Zaharia - matei.zaharia@gmail.com - https://cs.stanford.edu/people/matei - Apache Software Foundation - http://spark.apache.org - - - - JIRA - https://issues.apache.org/jira/browse/SPARK - - - - - Dev Mailing List - dev@spark.apache.org - dev-subscribe@spark.apache.org - dev-unsubscribe@spark.apache.org - - - - User Mailing List - user@spark.apache.org - user-subscribe@spark.apache.org - user-unsubscribe@spark.apache.org - - - - Commits Mailing List - commits@spark.apache.org - commits-subscribe@spark.apache.org - commits-unsubscribe@spark.apache.org - - - + + + central + http://artifacts.ggn.in.guavus.com/libs-release-local + + + snapshots + http://artifacts.ggn.in.guavus.com/libs-snapshot-local + + common/sketch common/kvstore From a928879c08dd5aa5e015aadc4ede58dde6947db5 Mon Sep 17 00:00:00 2001 From: "harsh.takkar" Date: Wed, 30 Mar 2022 17:57:16 +0530 Subject: [PATCH 2/4] feat(MVA) updated docker file --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 451c3453f8868..04365345da845 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG spark_image_tag=3.2.1-hadoop3.3-1.3 +ARG spark_image_tag=3.2.1-hadoop3.3-1.0 FROM artifacts.ggn.in.guavus.com:4244/spark:${spark_image_tag} From 39fc19c96ca1502cffc0623fd74a88eef5078c53 Mon Sep 17 00:00:00 2001 From: "harsh.takkar" Date: Wed, 30 Mar 2022 18:26:59 +0530 Subject: [PATCH 3/4] feat(MVA) updated stack size for catalyst build --- sql/catalyst/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 8f942665ef54d..5cc41b53fb3cc 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -162,7 +162,7 @@ org.scalatest scalatest-maven-plugin - -ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true + -ea -Xmx4g -Xss16m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true From c84b6b7fba18463153496d4fd0e73b8c6a6d59ad Mon Sep 17 00:00:00 2001 From: "harsh.takkar" Date: Wed, 30 Mar 2022 18:54:52 +0530 Subject: [PATCH 4/4] feat(MVA) added PysparkDockerfile --- PysparkDockerfile | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 PysparkDockerfile diff --git a/PysparkDockerfile b/PysparkDockerfile new file mode 100644 index 0000000000000..284f2bb131990 --- /dev/null +++ b/PysparkDockerfile @@ -0,0 +1,29 @@ +ARG spark_image_tag=3.2.1-hadoop3.3-1.0 + +FROM artifacts.ggn.in.guavus.com:4244/spark-py:${spark_image_tag} + +ARG spark_uid=185 + +USER root + +RUN apt-get update -y && \ + apt-get -y install curl && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-s3/1.11.901/aws-java-sdk-s3-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-s3-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.11.901/aws-java-sdk-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-core/1.11.901/aws-java-sdk-core-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-core-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-dynamodb/1.11.901/aws-java-sdk-dynamodb-1.11.901.jar -o /opt/spark/jars/aws-java-sdk-dynamodb-1.11.901.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/commons/commons-pool2/2.11.1/commons-pool2-2.11.1.jar -o /opt/spark/jars/commons-pool2-2.11.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.1/hadoop-aws-3.3.1.jar -o /opt/spark/jars/hadoop-aws-3.3.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-avro_2.12/3.2.1/spark-avro_2.12-3.2.1.jar -o /opt/spark/jars/spark-avro_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-token-provider-kafka-0-10_2.12/3.2.1/spark-token-provider-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-token-provider-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/net/java/dev/jets3t/jets3t/0.9.0/jets3t-0.9.0.jar -o /opt/spark/jars/jets3t-0.9.0.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.12/3.2.1/spark-sql-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-sql-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/kafka/kafka-clients/3.0.0/kafka-clients-3.0.0.jar -o /opt/spark/jars/kafka-clients-3.0.0.jar && \ + curl -fSL https://repo1.maven.org/maven2/org/apache/spark/spark-streaming-kafka-0-10_2.12/3.2.1/spark-streaming-kafka-0-10_2.12-3.2.1.jar -o /opt/spark/jars/spark-streaming-kafka-0-10_2.12-3.2.1.jar && \ + curl -fSL https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.13.0/jmx_prometheus_javaagent-0.13.0.jar -o /opt/spark/jars/jmx_prometheus_javaagent-0.13.0.jar + + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# Specify the User that the actual main process will run as +USER ${spark_uid} \ No newline at end of file