From 4956b2b6424b75209a8b4dd3e5a14c701f5cd05f Mon Sep 17 00:00:00 2001 From: Anton Agestam Date: Sun, 9 Apr 2023 16:31:36 +0200 Subject: [PATCH] Optimize container build time - Use cache mounts for pip and apt, heavily reducing network requests when cache is warm. - Remove git as a build-time dependency, other than switching to archive endpoints as was done in 39b4f08, this also requires either setting `KARAPACE_VERSION` as container build arg or building karapace/version.py independently. - Add hadolint pre-commit check for linting Dockerfile. - Move to using Python base images for builder and final stage. This allows omitting installation of some build tools. It also allows moving to a more recent Python version, no longer being bound by what's in distro repositories. Wheel availability of some of our Python dependencies prevents us from moving to 3.11 for now. - Change installation approach to construct a virtualenv in the builder step, and copying it unaltered to the final stage, with dependencies and Karapace itself installed in it. This allows having even fewer layers in the final stage, and is simpler. - Introduces a _much_ stricter .dockerignore, ignoring files by default and explicitly including what's required. This makes sure changes in unrelated files does not evict layer cache. For example, a few files that previously erroneously evicted caches, because everything was included: - .git/* - .mypy_cache/* - container/Dockerfile itself - __pycache__/* - .idea/* --- .dockerignore | 30 +++++++---- .github/workflows/container-smoke-test.yml | 3 ++ .pre-commit-config.yaml | 10 ++++ container/Dockerfile | 58 ++++++++-------------- container/start.sh | 2 +- setup.py | 2 +- version.py | 37 ++++++++------ 7 files changed, 78 insertions(+), 64 deletions(-) diff --git a/.dockerignore b/.dockerignore index cd0f9b005..6244185b0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,11 +1,19 @@ -# Ignoring files that are specific to a given checkout, these change based on -# the user commands and not on the repository history. They are not important -# to determine the state of the repository and would invalidate the cache -# layer. -# -# - .git/logs/HEAD - command history -# - .git/index - binary file for the current index, very important for a -# working repository, not interesting for our image -# -.git/logs/HEAD -.git/index +# Ignore everything by default. Making as few files as possible part of default context +# ensures only relevant changes will evict layer cache. +* + +# Include source directories and files required for building. +!karapace +!requirements/*.txt +!setup.py +!version.py +!README.rst +!container/start.sh +!container/healthcheck.py + +# Ignore some files in source directories. +**/.DS_Store +**/Thumbs.db +**/*.pyc +**/*.pyo +**/__pycache__ diff --git a/.github/workflows/container-smoke-test.yml b/.github/workflows/container-smoke-test.yml index 484172e23..d0eeb8dfe 100644 --- a/.github/workflows/container-smoke-test.yml +++ b/.github/workflows/container-smoke-test.yml @@ -14,6 +14,9 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 + - name: Build karapace/version.py + run: python version.py + - name: Build and start services run: docker compose --file=container/compose.yml up --build --wait --detach diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd0366b76..f35c49298 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -66,6 +66,16 @@ repos: - id: mypy pass_filenames: false +- repo: https://github.com/hadolint/hadolint + rev: v2.12.0 + hooks: + - id: hadolint-docker + alias: hadolint + args: + # This rule has false positives when using a mounted cache volume. + # https://github.com/hadolint/hadolint/issues/497 + - --ignore=DL3042 + - repo: https://github.com/PyCQA/pylint # Note: pre-commit autoupdate changes to an alpha version. Instead, manually find the # latest stable version here: https://github.com/pylint-dev/pylint/releases diff --git a/container/Dockerfile b/container/Dockerfile index 2e314273a..f2ca1a091 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -1,56 +1,42 @@ -# Builder image contains header files and additional dependencies necessary to -# generate wheel files. -FROM debian:stable-slim AS builder +# Current versions of avro and zstandard don't yet have wheels for 3.11. +FROM python:3.10.11-bullseye AS builder ARG KARAPACE_VERSION -# Build dependencies that need to be installed: -# - git: Used to install dependencies directly from their public repos (release -# not on PyPI). -# - python3-devel: Python .h files, used to compile C extensions (e.g. multidict) -# -# Build dependencies that need to be installed because of `--no-install-recommends`: -# - gcc: g++ and gcc to compile C extensions -# - python3-wheel: Library to generate .whl files -# - python3-setuptools: Packaging library -# -RUN apt-get update && \ - apt-get -y install --no-install-recommends git python3-dev python3-pip python3-setuptools python3-wheel gcc && \ - rm -rf /var/lib/apt/lists/* +# Create, activate, and enforce usage of virtualenv. +RUN python3 -m venv /venv +ENV PATH="/venv/bin:$PATH" +ENV PIP_REQUIRE_VIRTUALENV=true -# Copy the requirements.txt and generate wheels for each dependency. Using a -# separate command to use layer caching. +# Copy the requirements.txt and install dependencies in venv. Using a separate +# command to use layer caching. # # Note: the requirements.txt is pinned, if any of the dependencies is updated # the cache will be invalidated and the image regenerated, which is the # intended behavior. -# COPY ./requirements/requirements.txt /build/ -RUN pip3 wheel --requirement /build/requirements.txt --wheel-dir /build/dependencies-wheels +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install -r /build/requirements.txt COPY . /build/karapace-repo -RUN pip3 wheel --no-deps /build/karapace-repo --wheel-dir /build/karapace-wheel +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install /build/karapace-repo # Karapace image. -FROM debian:stable-slim AS karapace - -RUN groupadd --system karapace && \ - useradd --system --gid karapace karapace && \ - mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace && \ - chown --recursive karapace:karapace /opt/karapace /var/log/karapace - -RUN apt-get update && \ - apt-get -y install --no-install-recommends python3-pip protobuf-compiler && \ - rm -rf /var/lib/apt/lists/* +FROM python:3.10.11-slim-bullseye AS karapace -COPY --from=builder /build/dependencies-wheels/*.whl /build/dependencies-wheels/ -RUN pip3 install --no-deps /build/dependencies-wheels/*.whl && rm -rf /build/dependencies-wheels/ +RUN groupadd --system karapace \ + && useradd --system --gid karapace karapace \ + && mkdir /opt/karapace /opt/karapace/runtime /var/log/karapace \ + && chown --recursive karapace:karapace /opt/karapace /var/log/karapace -COPY --from=builder /build/karapace-wheel/*.whl /build/karapace-wheel/ -RUN pip3 install --no-deps /build/karapace-wheel/*.whl && rm -rf /build/karapace-wheel/ +# Copy virtualenv from builder and activate it. +COPY --from=builder /venv /venv +ENV PATH="/venv/bin:$PATH" COPY ./container/start.sh /opt/karapace -RUN chmod 500 /opt/karapace/start.sh && chown karapace:karapace /opt/karapace/start.sh +RUN chmod 500 /opt/karapace/start.sh \ + && chown karapace:karapace /opt/karapace/start.sh COPY ./container/healthcheck.py /opt/karapace diff --git a/container/start.sh b/container/start.sh index 28c97948c..95ac86aa2 100755 --- a/container/start.sh +++ b/container/start.sh @@ -35,7 +35,7 @@ registry) [[ -n ${KARAPACE_REGISTRY_PORT+isset} ]] && export KARAPACE_PORT="${KARAPACE_REGISTRY_PORT}" [[ -n ${KARAPACE_REGISTRY_CLIENT_ID+isset} ]] && export KARAPACE_CLIENT_ID="${KARAPACE_REGISTRY_CLIENT_ID}" [[ -n ${KARAPACE_REGISTRY_GROUP_ID+isset} ]] && export KARAPACE_GROUP_ID="${KARAPACE_REGISTRY_GROUP_ID}" - # Map misspelt environment variable to correct spelling for backwards compatibility. + # Map misspelled environment variables to correct spelling for backwards compatibility. [[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBITY}" [[ -n ${KARAPACE_REGISTRY_MASTER_ELIGIBILITY+isset} ]] && export KARAPACE_MASTER_ELIGIBILITY="${KARAPACE_REGISTRY_MASTER_ELIGIBILITY}" [[ -n ${KARAPACE_REGISTRY_TOPIC_NAME+isset} ]] && export KARAPACE_TOPIC_NAME="${KARAPACE_REGISTRY_TOPIC_NAME}" diff --git a/setup.py b/setup.py index dd1a63639..d36bcb666 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ with open(readme_path, encoding="utf8") as fp: readme_text = fp.read() -version_for_setup_py = version.get_project_version("karapace/version.py") +version_for_setup_py = version.get_project_version() version_for_setup_py = ".dev".join(version_for_setup_py.split("-", 2)[:2]) setup( diff --git a/version.py b/version.py index cce82ef9e..64a193286 100644 --- a/version.py +++ b/version.py @@ -4,26 +4,35 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -import importlib.util +from __future__ import annotations + +from typing import Final + import os +import pathlib import subprocess +version_file: Final = pathlib.Path(__file__).parent.resolve() / "karapace/version.py" + -def save_version(new_ver, old_ver, version_file): +def save_version(new_ver, old_ver): if not new_ver: return False - version_file = os.path.join(os.path.dirname(__file__), version_file) if not old_ver or new_ver != old_ver: - with open(version_file, mode="w", encoding="utf8") as fp: - fp.write(f'"""{__doc__}"""\n__version__ = "{new_ver}"\n') + version_file.write_text(f'"""{__doc__}"""\n__version__ = "{new_ver}"\n') return True -def get_project_version(version_file: str) -> str: - version_file_full_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), version_file) - module_spec = importlib.util.spec_from_file_location("verfile", version_file_full_path) - module = importlib.util.module_from_spec(module_spec) - file_ver = getattr(module, "__version__", None) +def from_version_file() -> str | None: + try: + import karapace.version + except ImportError: + return None + return karapace.version.__version__ + + +def get_project_version() -> str: + file_ver = from_version_file() version = os.getenv("KARAPACE_VERSION") if version is None: @@ -40,16 +49,14 @@ def get_project_version(version_file: str) -> str: git_ver = f"0.0.1-0-unknown-{git_ver}" version = git_ver - if save_version(version, file_ver, version_file): + if save_version(version, file_ver): return version if not file_ver: - raise RuntimeError(f"version not available from git or from file {version_file!r}") + raise RuntimeError(f"version not available from git or from file {str(version_file)!r}") return file_ver if __name__ == "__main__": - import sys - - get_project_version(sys.argv[1]) + get_project_version()