From adf7ddc380012d7cb7fe9020c48e4cbc9c83a261 Mon Sep 17 00:00:00 2001 From: pascal omondiagbe Date: Tue, 12 Nov 2024 09:59:32 +1100 Subject: [PATCH] Fix BASE_IMAGE Issue and Optimise Docker Build Time to 8.3m with Precompilation (#47) * Fix BASE_IMAGE not working #35 * Fix: Remove MKL_jll-related warnings and reduce Docker build time - Added explicit installation and precompilation of MKL_jll in the Dockerfile to resolve warnings about missing precompiled modules during container startup. - Precompiled MKL_jll and IntelOpenMP artifacts during the build phase, significantly reducing precompilation time from 18 minutes to 9 minutes by avoiding on-the-fly precompilation. - The time reduction is attributed to precompiling MKL_jll in advance, preventing the heavy processing load that would otherwise occur during runtime. - Added gdal-bin, libgdal-dev, and libfftw3-dev to the Dockerfile to support system dependencies required for geospatial and scientific computations. * Simplify Dockerfile by consolidating Project.toml and Manifest.toml copy commands * Added step to pre-download MKL_jll using version from Manifest.toml and cache it before main package precompilation. * Add explicit MKL_jll dependency Seeing if this resolves horrendous build times * Trying out approach to derive dep separately so we can not invalidate cache for base MKL dep Signed-off-by: Peter Baker Co-authored-by: Pascal Omondiagbe Co-authored-by: Takuya Iwanaga Co-authored-by: Peter Baker Co-authored-by: github-actions[bot] --- .github/workflows/PublishDockerImage.yml | 6 ++---- Dockerfile | 27 ++++++++++++++++++------ Project.toml | 2 ++ docs/src/docker.md | 6 ++++++ 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/.github/workflows/PublishDockerImage.yml b/.github/workflows/PublishDockerImage.yml index 8647b6f..f1455cb 100644 --- a/.github/workflows/PublishDockerImage.yml +++ b/.github/workflows/PublishDockerImage.yml @@ -2,6 +2,7 @@ # It is triggered when a push is made to the main branch. # Additional notes: +# - Checks that MKL_jll dependency has been output before running # - The workflow uses the github.repository context to name the image, ensuring it's tied to your repository # - The GITHUB_TOKEN is automatically provided by GitHub Actions, no need to set it up manually # - The Docker metadata action automatically generates appropriate tags based on the release version @@ -11,16 +12,13 @@ name: Build and Publish ReefGuideAPI.jl Docker Image on: workflow_dispatch: - # push: - # branches: - # - main release: types: [published] env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }}/reefguide-src - JULIA_VERSION: 1.10.5 + JULIA_VERSION: 1.11 # Set to true to use a fixed ReefGuideAPI version for debugging, false to use the release version USE_FIXED_REEFGUIDEAPI_VERSION: true # TODO this doesn't make sense until the releasing is sorted out diff --git a/Dockerfile b/Dockerfile index 9198731..8a8b98d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # See https://hub.docker.com/_/julia for valid versions. -ARG JULIA_VERSION="1.10.5" +ARG JULIA_VERSION="1.11.1" #------------------------------------------------------------------------------ # internal-base build target: julia with OS updates and an empty @reefguide @@ -8,9 +8,10 @@ ARG JULIA_VERSION="1.10.5" FROM julia:${JULIA_VERSION}-bookworm AS internal-base # Record the actual base image used from the FROM command as label in the compiled image -ARG BASE_IMAGE=$BASE_IMAGE +ARG BASE_IMAGE="julia:${JULIA_VERSION}-bookworm" LABEL org.opencontainers.image.base.name=${BASE_IMAGE} + # Update all pre-installed OS packages (to get security updates) # and add a few extra utilities RUN --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \ @@ -21,10 +22,14 @@ RUN --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \ git \ less \ nano \ + gdal-bin \ + libgdal-dev \ + libfftw3-dev \ && apt-get clean \ && apt-get autoremove --purge \ && rm -rf /var/lib/apt/lists/* + # Tweak the JULIA_DEPOT_PATH setting so that our shared environments will end up # in a user-agnostic location, not in ~/.julia => /root/.julia which is the default. # See https://docs.julialang.org/en/v1/manual/environment-variables/#JULIA_DEPOT_PATH @@ -101,9 +106,19 @@ ENV JULIA_CPU_TARGET=x86_64;haswell;skylake;skylake-avx512;tigerlake # those to set up the ReefGuideAPI source code as a development package in the # shared @reefguide environment, pre-installing and precompiling dependencies. WORKDIR "${REEFGUIDE_SRC_DIR}" -COPY ./Project.toml ./Project.toml -COPY ./Manifest.toml ./Manifest.toml -RUN julia --project=@reefguide -e 'using Pkg; Pkg.instantiate(verbose=true)' + + +# Copy project and manifest - includes Manifest-v1.11 etc +COPY Project.toml Manifest*.toml ./ + +# Then fire up a julia execution just to dump out the version +RUN echo $(julia --project=. -e 'using Pkg; println(Pkg.dependencies()[Base.UUID("856f044c-d86e-5d09-b602-aeab76dc8ba7")].version)') | cut -d '+' -f 1 >> mkl.dep + +# Compile MKL_jll first - this improves build time significantly - unsure exactly why +RUN MKL_VERSION=$(cat mkl.dep) julia -e 'using Pkg; Pkg.add(PackageSpec(name="MKL_jll", version=ENV["MKL_VERSION"])); Pkg.precompile()' + +# Precompile Julia packages using BuildKit cache for better efficiency +RUN julia --project=@reefguide -e 'using Pkg; Pkg.instantiate(); Pkg.precompile()' # Install the ReefGuideAPI source code and configure it as a development # package in the @reefguide shared environment. @@ -121,4 +136,4 @@ EXPOSE 8000 # Run Julia commands by default as the container launches. # Derived applications should override the command. -ENTRYPOINT ["julia", "--project=@reefguide", "-t", "auto,1", "-e", "using ReefGuideAPI; ReefGuideAPI.start_server(\"/data/reefguide/config.toml\")"] +ENTRYPOINT ["julia", "--project=@reefguide", "-t", "auto,1", "-e", "using ReefGuideAPI; ReefGuideAPI.start_server(\"/data/reefguide/config.toml\")"] \ No newline at end of file diff --git a/Project.toml b/Project.toml index 1c28e9e..0cc9ffd 100644 --- a/Project.toml +++ b/Project.toml @@ -28,6 +28,7 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JSONWebTokens = "9b8beb19-0777-58c6-920b-28f749fee4d3" LibGEOS = "a90b1aa1-3769-5649-ba7e-abc5a9d163eb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MKL_jll = "856f044c-d86e-5d09-b602-aeab76dc8ba7" Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" @@ -48,3 +49,4 @@ YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" [compat] DiskArrays = "=0.3.23, ^0.4.5" +MKL_jll = "2024.2.0" diff --git a/docs/src/docker.md b/docs/src/docker.md index 8c277eb..63f1e0f 100644 --- a/docs/src/docker.md +++ b/docs/src/docker.md @@ -5,6 +5,12 @@ means you can run an instance of the ReefGuideAPI.jl package without needing to compile/build it with a local `Julia` installation. You will be able to view the latest published versions of the Docker image on the repository packages page. +## A note about MKL_jll + +Due to how Julia (particularly v1.11) handles precompilation, it significantly reduces the build time by explicitly installed MKL_jll before installing any of explicit project dependencies. + +For this reason, the Dockerfile extracts the MKL_jll version from the Manifest file using Pkg.dependency(), precompiles this in an anonymous project, then compiles the main dependencies. This cuts the build time from around 15 minutes down to around 6-7. + ## Mounting files and required data As mentioned in [Getting Started](@ref getting_started), the `ReefGuideAPI.jl` package currently requires