apache · pan3793 · Sep 19, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
@@ -397,14 +397,6 @@ jobs:
       - name: Make IRkernel available to Jupyter
         run: |
           R -e "IRkernel::installspec()"
-      - name: run spark-3.2 tests with scala-2.12 and python-${{ matrix.python }}
-        run: |
-          rm -rf spark/interpreter/metastore_db
-          ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.2 -Pspark-scala-2.12 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
-      - name: run spark-3.2 tests with scala-2.13 and python-${{ matrix.python }}
-        run: |
-          rm -rf spark/interpreter/metastore_db
-          ./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.2 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
       - name: run spark-3.3 tests with scala-2.12 and python-${{ matrix.python }}
         run: |
           rm -rf spark/interpreter/metastore_db

diff --git a/Dockerfile b/Dockerfile
@@ -21,9 +21,9 @@ ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverhea
 # Allow npm and bower to run with root privileges
 RUN echo "unsafe-perm=true" > ~/.npmrc && \
     echo '{ "allow_root": true }' > ~/.bowerrc && \
-    ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.3 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist && \
+    ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist && \
     # Example with doesn't compile all interpreters
-    # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist -pl '!groovy,!livy,!hbase,!file,!flink' && \
+    # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.4 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-classic -Pweb-dist -pl '!groovy,!livy,!hbase,!file,!flink' && \
     mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*-bin/zeppelin-*-bin /opt/zeppelin/ && \
     # Removing stuff saves time, because docker creates a temporary layer
     rm -rf ~/.m2 && \

diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md
@@ -83,7 +83,7 @@ You can directly start Zeppelin by running the following command after successfu
 
 To be noticed, the spark profiles here only affect the unit test (no need to specify `SPARK_HOME`) of spark interpreter. 
 Zeppelin doesn't require you to build with different spark to make different versions of spark work in Zeppelin.
-You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 3.2 to 3.5.
+You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 3.3 to 3.5.
 
 To build with a specific Spark version or scala versions, define one or more of the following profiles and options:
 
@@ -97,7 +97,6 @@ Available profiles are
 -Pspark-3.5
 -Pspark-3.4
 -Pspark-3.3
--Pspark-3.2
 ```
 
 minor version can be adjusted by `-Dspark.version=x.x.x`

diff --git a/docs/setup/deployment/flink_and_spark_cluster.md b/docs/setup/deployment/flink_and_spark_cluster.md
@@ -42,8 +42,8 @@ Assuming the minimal install, there are several programs that we will need to in
 
 - git
 - openssh-server
-- OpenJDK 7
-- Maven 3.1+
+- OpenJDK 11
+- Maven
 
 For git, openssh-server, and OpenJDK 7 we will be using the apt package manager.
 
@@ -60,17 +60,10 @@ sudo apt-get install git
 sudo apt-get install openssh-server
 ```
 
-##### OpenJDK 7
+##### OpenJDK 11
 
 ```bash
-sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
-```
-*A note for those using Ubuntu 16.04*: To install `openjdk-7` on Ubuntu 16.04, one must add a repository.  [Source](http://askubuntu.com/questions/761127/ubuntu-16-04-and-openjdk-7)
-
-```bash
-sudo add-apt-repository ppa:openjdk-r/ppa
-sudo apt-get update
-sudo apt-get install openjdk-7-jdk openjdk-7-jre-lib
+sudo apt-get install openjdk-11-jdk
 ```
 
 ### Installing Zeppelin
@@ -92,26 +85,23 @@ cd zeppelin
 Package Zeppelin.
 
 ```bash
-./mvnw clean package -DskipTests -Pspark-3.2 -Dflink.version=1.1.3 -Pscala-2.11
+./mvnw clean package -DskipTests -Pspark-3.5 -Pflink-1.17
 ```
 
 `-DskipTests` skips build tests- you're not developing (yet), so you don't need to do tests, the clone version *should* build.
 
-`-Pspark-3.2` tells maven to build a Zeppelin with Spark 3.2.  This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
-
-`-Dflink.version=1.1.3` tells maven specifically to build Zeppelin with Flink version 1.1.3.
+`-Pspark-3.5` tells maven to build a Zeppelin with Spark 3.5.  This is important because Zeppelin has its own Spark interpreter and the versions must be the same.
 
--`-Pscala-2.11` tells maven to build with Scala v2.11.
+`-Pflink-1.17` tells maven to build a Zeppelin with Flink 1.17.
 
-
-**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 3.2 was the most recent Spark version available.
+**Note:** You can build against any version of Spark that has a Zeppelin build profile available. The key is to make sure you check out the matching version of Spark to build. At the time of this writing, Spark 3.5 was the most recent Spark version available.
 
 **Note:** On build failures. Having installed Zeppelin close to 30 times now, I will tell you that sometimes the build fails for seemingly no reason.
 As long as you didn't edit any code, it is unlikely the build is failing because of something you did. What does tend to happen, is some dependency that maven is trying to download is unreachable.  If your build fails on this step here are some tips:
 
 - Don't get discouraged.
 - Scroll up and read through the logs. There will be clues there.
-- Retry (that is, run the `./mvnw clean package -DskipTests -Pspark-3.2` again)
+- Retry (that is, run the `./mvnw clean package -DskipTests -Pspark-3.5` again)
 - If there were clues that a dependency couldn't be downloaded wait a few hours or even days and retry again. Open source software when compiling is trying to download all of the dependencies it needs, if a server is off-line there is nothing you can do but wait for it to come back.
 - Make sure you followed all of the steps carefully.
 - Ask the community to help you. Go [here](http://zeppelin.apache.org/community.html) and join the user mailing list. People are there to help you. Make sure to copy and paste the build output (everything that happened in the console) and include that in your message.
@@ -225,7 +215,7 @@ Building from source is recommended  where possible, for simplicity in this tuto
 To download the Flink Binary use `wget`
 
 ```bash
-wget "http://mirror.cogentco.com/pub/apache/flink/flink-1.16.2/flink-1.16.2-bin-scala_2.12.tgz"
+wget "https://archive.apache.org/dist/flink/flink-1.16.2/flink-1.16.2-bin-scala_2.12.tgz"
 tar -xzvf flink-1.16.2-bin-scala_2.12.tgz
 ```
 
@@ -243,13 +233,13 @@ If you wish to build Flink from source, the following will be instructive.  Note
 
 See the [Flink Installation guide](https://github.com/apache/flink/blob/master/README.md) for more detailed instructions.
 
-Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink,  check out release-1.1.3-rc2, and build.
+Return to the directory where you have been downloading, this tutorial assumes that is `$HOME`. Clone Flink,  check out release-1.16.2, and build.
 
 ```bash
 cd $HOME
 git clone https://github.com/apache/flink.git
 cd flink
-git checkout release-1.1.3-rc2
+git checkout release-1.16.2
 mvn clean install -DskipTests
 ```
 
@@ -271,8 +261,8 @@ If no task managers are present, restart the Flink cluster with the following co
 (if binaries)
 
 ```bash
-flink-1.1.3/bin/stop-cluster.sh
-flink-1.1.3/bin/start-cluster.sh
+flink-1.16.2/bin/stop-cluster.sh
+flink-1.16.2/bin/start-cluster.sh
 ```
 
 
@@ -295,34 +285,31 @@ Using binaries is also
 To download the Spark Binary use `wget`
 
 ```bash
-wget "https://dlcdn.apache.org/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz"
-tar -xzvf spark-3.4.1-bin-hadoop3.tgz
-mv spark-3.4.1-bin-hadoop3 spark
+wget "https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz"
+tar -xzvf spark-3.5.2-bin-hadoop3.tgz
+mv spark-3.5.2-bin-hadoop3 spark
 ```
 
-This will download Spark 3.4.1, compatible with Hadoop 3.  You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `3` to your appropriate version.
+This will download Spark 3.5.2, compatible with Hadoop 3.  You do not have to install Hadoop for this binary to work, but if you are using Hadoop, please change `3` to your appropriate version.
 
 ###### Building From source
 
 Spark is an extraordinarily large project, which takes considerable time to download and build. It is also prone to build failures for similar reasons listed in the Flink section.  If the user wishes to attempt to build from source, this section will provide some reference.  If errors are encountered, please contact the Apache Spark community.
 
 See the [Spark Installation](https://github.com/apache/spark/blob/master/README.md) guide for more detailed instructions.
 
-Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-1.6, and build.
-**Note:** Recall, we're only checking out 1.6 because it is the most recent Spark for which a Zeppelin profile exists at
-  the time of writing. You are free to check out other version, just make sure you build Zeppelin against the correct version of Spark. However if you use Spark 2.0, the word count example will need to be changed as Spark 2.0 is not compatible with the following examples.
-
+Return to the directory where you have been downloading, this tutorial assumes that is $HOME. Clone Spark, check out branch-3.5, and build.
 
 ```bash
 cd $HOME
 ```
 
-Clone, check out, and build Spark version 1.6.x.
+Clone, check out, and build Spark version 3.5.x.
 
 ```bash
 git clone https://github.com/apache/spark.git
 cd spark
-git checkout branch-1.6
+git checkout branch-3.5
 mvn clean package -DskipTests
 ```
 

diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml
@@ -40,10 +40,6 @@
     <maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
     <wagon.version>2.7</wagon.version>
 
-    <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
-    <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
-    <datanucleus.core.version>4.1.17</datanucleus.core.version>
-
     <!-- spark versions -->
     <spark.version>3.4.1</spark.version>
     <protobuf.version>3.21.12</protobuf.version>
@@ -222,27 +218,6 @@
     </dependency>
 
     <!--test libraries-->
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-core</artifactId>
-      <version>${datanucleus.core.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-api-jdo</artifactId>
-      <version>${datanucleus.apijdo.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-rdbms</artifactId>
-      <version>${datanucleus.rdbms.version}</version>
-      <scope>test</scope>
-    </dependency>
-
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
@@ -589,16 +564,6 @@
         <py4j.version>0.10.9.5</py4j.version>
       </properties>
     </profile>
-
-    <profile>
-      <id>spark-3.2</id>
-      <properties>
-        <spark.version>3.2.4</spark.version>
-        <protobuf.version>2.5.0</protobuf.version>
-        <py4j.version>0.10.9.5</py4j.version>
-      </properties>
-    </profile>
-
   </profiles>
 
 </project>
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java
@@ -97,7 +97,7 @@ public void setUp() {
       when(mockContext.getIntpEventClient()).thenReturn(mockIntpEventClient);
 
       try {
-        sparkShims = SparkShims.getInstance(SparkVersion.SPARK_3_2_0.toString(), new Properties(), null);
+        sparkShims = SparkShims.getInstance(SparkVersion.SPARK_3_3_0.toString(), new Properties(), null);
       } catch (Throwable e1) {
         throw new RuntimeException("All SparkShims are tried, but no one can be created.");
       }

diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkVersionTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkVersionTest.java
@@ -48,14 +48,14 @@ void testSparkVersion() {
     assertEquals(SparkVersion.SPARK_3_5_0, SparkVersion.fromVersionString("3.5.0.2.5.0.0-1245"));
 
     // test newer than
-    assertTrue(SparkVersion.SPARK_3_5_0.newerThan(SparkVersion.SPARK_3_2_0));
+    assertTrue(SparkVersion.SPARK_3_5_0.newerThan(SparkVersion.SPARK_3_3_0));
     assertTrue(SparkVersion.SPARK_3_5_0.newerThanEquals(SparkVersion.SPARK_3_5_0));
-    assertFalse(SparkVersion.SPARK_3_2_0.newerThan(SparkVersion.SPARK_3_5_0));
+    assertFalse(SparkVersion.SPARK_3_3_0.newerThan(SparkVersion.SPARK_3_5_0));
 
     // test older than
-    assertTrue(SparkVersion.SPARK_3_2_0.olderThan(SparkVersion.SPARK_3_5_0));
-    assertTrue(SparkVersion.SPARK_3_2_0.olderThanEquals(SparkVersion.SPARK_3_2_0));
-    assertFalse(SparkVersion.SPARK_3_5_0.olderThan(SparkVersion.SPARK_3_2_0));
+    assertTrue(SparkVersion.SPARK_3_3_0.olderThan(SparkVersion.SPARK_3_5_0));
+    assertTrue(SparkVersion.SPARK_3_5_0.olderThanEquals(SparkVersion.SPARK_3_5_0));
+    assertFalse(SparkVersion.SPARK_3_5_0.olderThan(SparkVersion.SPARK_3_3_0));
 
     // test newerThanEqualsPatchVersion
     assertTrue(SparkVersion.fromVersionString("2.3.1")

diff --git a/spark/pom.xml b/spark/pom.xml
@@ -32,12 +32,7 @@
     <name>Zeppelin: Spark Parent</name>
     <description>Zeppelin Spark Support</description>
 
-    <properties>
-        <datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
-        <datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
-        <datanucleus.core.version>3.2.10</datanucleus.core.version>
-
-        <!-- spark versions -->
+    <properties><!-- spark versions -->
         <spark.version>3.4.1</spark.version>
         <protobuf.version>2.5.0</protobuf.version>
         <py4j.version>0.10.9.7</py4j.version>

diff --git a/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java b/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
@@ -25,15 +25,13 @@
 public class SparkVersion {
   private static final Logger LOGGER = LoggerFactory.getLogger(SparkVersion.class);
 
-  public static final SparkVersion SPARK_3_2_0 = SparkVersion.fromVersionString("3.2.0");
-
   public static final SparkVersion SPARK_3_3_0 = SparkVersion.fromVersionString("3.3.0");
 
   public static final SparkVersion SPARK_3_5_0 = SparkVersion.fromVersionString("3.5.0");
 
   public static final SparkVersion SPARK_4_0_0 = SparkVersion.fromVersionString("4.0.0");
 
-  public static final SparkVersion MIN_SUPPORTED_VERSION =  SPARK_3_2_0;
+  public static final SparkVersion MIN_SUPPORTED_VERSION =  SPARK_3_3_0;
   public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_4_0_0;
 
   private int version;