diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..332acff44 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +.git +.github +.idea +bin +conf +docs/build +docs/temp +docs/venv +metastore_db +target +common/target +spark-integration/target +fuzz-testing/target +spark/target +native/target +core/target +spark-warehouse +venv diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..daa6db324 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Publish Docker images + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +on: + push: + tags: + - '*.*.*' + - '*.*.*-rc*' + - 'test-docker-publish-*' + +docker: + name: Docker + runs-on: ubuntu-22.04 + permissions: + contents: read + packages: write + steps: + - name: Set up Java + uses: actions/setup-java@v3 + with: + java-version: '17' + - name: Extract Comet version + id: extract_version + run: | + COMET_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + echo "COMET_VERSION=$COMET_VERSION" >> $GITHUB_ENV + - name: Echo Comet version + run: echo "The current Comet version is ${{ env.COMET_VERSION }}" + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64,linux/arm64 + push: true + tags: apache/datafusion-comet:spark-3.4-scala-2.12-${{ env.COMET_VERSION }} + file: kube/Dockerfile diff --git a/docs/source/user-guide/installation.md b/docs/source/user-guide/installation.md index 45f988e29..07fc689c5 100644 --- a/docs/source/user-guide/installation.md +++ b/docs/source/user-guide/installation.md @@ -32,7 +32,11 @@ Make sure the following requirements are met and software installed on your mach - JDK 8 and up - GLIBC 2.17 (Centos 7) and up -## Using a Published Binary Release +## Using a Published Docker Image + +Docker images are available at https://github.com/orgs/apache/packages?repo_name=datafusion-comet + +## Using a Published JAR File There are no published binary releases yet. diff --git a/kube/Dockerfile b/kube/Dockerfile index d6244c113..4e15794ab 100644 --- a/kube/Dockerfile +++ b/kube/Dockerfile @@ -21,7 +21,6 @@ USER root # Installing JDK11 as the image comes with JRE RUN apt update \ - && apt install -y git \ && apt install -y curl \ && apt install -y openjdk-11-jdk \ && apt clean @@ -32,14 +31,38 @@ ENV RUSTFLAGS="-C debuginfo=line-tables-only -C incremental=false" ENV SPARK_VERSION=3.4 ENV SCALA_VERSION=2.12 +# copy source files to Docker image +RUN mkdir /comet +WORKDIR /comet + +# build native code first so that this layer can be re-used +# if only Scala code gets modified +COPY rust-toolchain.toml /comet/rust-toolchain.toml +COPY native /comet/native +RUN cd native && RUSTFLAGS="-Ctarget-cpu=native" cargo build --release + +# copy the rest of the project +COPY .mvn /comet/.mvn +COPY mvnw /comet/mvnw +COPY common /comet/common +COPY dev /comet/dev +COPY docs /comet/docs +COPY fuzz-testing /comet/fuzz-testing +COPY spark /comet/spark +COPY spark-integration /comet/spark-integration +COPY scalafmt.conf /comet/scalafmt.conf +COPY .scalafix.conf /comet/.scalafix.conf +COPY Makefile /comet/Makefile +COPY pom.xml /comet/pom.xml + # Pick the JDK instead of JRE to compile Comet -RUN cd /opt \ - && git clone https://github.com/apache/datafusion-comet.git \ - && cd datafusion-comet \ - && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION" +RUN cd /comet \ + && JAVA_HOME=$(readlink -f $(which javac) | sed "s/\/bin\/javac//") make release-nogit PROFILES="-Pspark-$SPARK_VERSION -Pscala-$SCALA_VERSION" FROM apache/spark:3.4.2 ENV SPARK_VERSION=3.4 ENV SCALA_VERSION=2.12 USER root -COPY --from=builder /opt/datafusion-comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.1.0-SNAPSHOT.jar $SPARK_HOME/jars \ No newline at end of file + +# ntoe the use of a wildcard in the file name so that this works with both snapshot and final release versions +COPY --from=builder /comet/spark/target/comet-spark-spark${SPARK_VERSION}_$SCALA_VERSION-0.2.0*.jar $SPARK_HOME/jars \ No newline at end of file diff --git a/native/Cargo.toml b/native/Cargo.toml index 9977ceece..2e73c5445 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -39,15 +39,15 @@ arrow-buffer = { version = "52.2.0" } arrow-data = { version = "52.2.0" } arrow-schema = { version = "52.2.0" } parquet = { version = "52.2.0", default-features = false, features = ["experimental"] } -datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1" } -datafusion = { default-features = false, git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", features = ["crypto_expressions"] } -datafusion-functions-nested = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } -datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } -datafusion-execution = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } -datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } -datafusion-physical-expr-common = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } -datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "41.0.0-rc1", default-features = false } +datafusion-common = { version = "41.0.0" } +datafusion = { default-features = false, version = "41.0.0", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { version = "41.0.0", features = ["crypto_expressions"] } +datafusion-functions-nested = { version = "41.0.0", default-features = false } +datafusion-expr = { version = "41.0.0", default-features = false } +datafusion-execution = { version = "41.0.0", default-features = false } +datafusion-physical-plan = { version = "41.0.0", default-features = false } +datafusion-physical-expr-common = { version = "41.0.0", default-features = false } +datafusion-physical-expr = { version = "41.0.0", default-features = false } datafusion-comet-spark-expr = { path = "spark-expr", version = "0.2.0" } datafusion-comet-proto = { path = "proto", version = "0.2.0" } chrono = { version = "0.4", default-features = false, features = ["clock"] } diff --git a/pom.xml b/pom.xml index 311437cc9..d41a57dbc 100644 --- a/pom.xml +++ b/pom.xml @@ -588,6 +588,10 @@ under the License. + + scala-2.12 + + scala-2.13 @@ -938,6 +942,7 @@ under the License. **/build/** **/target/** **/apache-spark/** + .dockerignore .git/** .github/** .gitignore @@ -963,7 +968,7 @@ under the License. docs/source/_static/images/** dev/release/rat_exclude_files.txt dev/release/requirements.txt - native/core/src/execution/generated/** + native/proto/src/generated/**