From 176417b84571f02f24aeae4afb8642a0700c771e Mon Sep 17 00:00:00 2001 From: Viktor Bozhinov <45173816+VKTB@users.noreply.github.com> Date: Thu, 31 Aug 2023 13:00:00 +0100 Subject: [PATCH] Containerize application and configure GitHub Actions to build and push Docker image to Harbor (#426) * Set python image #354 * Upgrade pip #354 * Install Poetry #354 * Install a different version of setuptools #354 * Install Gunicorn #354 * Install the app dependencies #354 * Serve the app on a Gunicorn server #354 * Pin Poetry version #354 * Define Actions job for building and pushing Docker image to Harbor #355 * Configure job to only run after other jobs succeed #355 * Configure job to login to Harbor #355 * Configure job to extract Docker metadata #355 * Configure job to build image #355 * Configure job to push image to Harbor on pushes to k8s-deployment branch #355 * Add job documentation and TODOs #355 * Configure dependabot to maintain GH Actions dependencies #355 * Add TODO for branch name of push events #355 * Update Dockerfile and add entrypoint script * Use specific python and alpine versions in the base image * Pin python package versions in Dockerfile * Use a cache mount to speed up pip and poetry * Comment the RUN step * Move things out of the datagateway-api-run directory * Remove workaround that is no longer needed * log_location value should not be quoted * Only copy necessary files to build container * Improve readability of RUN instructions * Use a temp file instead of sed -i in entrypoint script * Create a symlink to the installed python module * Address TODOs * Change default value of ICAT_CHECK_CERT ENV * Upgrade and pin actions to commit SHAs * ci(docker): bump actions/checkout to 3.5.3 in docker job * Update README --------- Co-authored-by: Alan Kyffin --- .github/workflows/ci-build.yml | 32 +++++++++++++++++++ Dockerfile | 58 ++++++++++++++++++++++++++++++++++ README.md | 26 +++++++++++++++ docker/docker-entrypoint.sh | 17 ++++++++++ 4 files changed, 133 insertions(+) create mode 100644 Dockerfile create mode 100755 docker/docker-entrypoint.sh diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index a0526ab3..30d302d0 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -360,3 +360,35 @@ jobs: # different to SciGateway preprod - name: Diff SQL dumps run: diff -s ~/generator_script_dump_main.sql ~/generator_script_dump_1.sql + + docker: + # This job triggers only if all the other jobs succeed and does different things depending on the context. + # The job builds the Docker image in all cases and also pushes the image to Harbor only if something is + # pushed to the main branch. + needs: [tests, linting, formatting, safety, generator-script-testing] + name: Docker + runs-on: ubuntu-20.04 + steps: + - name: Check out repo + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 + + - name: Login to Harbor + uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a # v2.1.0 + with: + registry: harbor.stfc.ac.uk/datagateway + username: ${{ secrets.HARBOR_USERNAME }} + password: ${{ secrets.HARBOR_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@c4ee3adeed93b1fa6a762f209fb01608c1a22f1e # v4.4.0 + with: + images: harbor.stfc.ac.uk/datagateway/datagateway-api + + - name: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' && 'Build and push Docker image to Harbor' || 'Build Docker image' }} + uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 # v4.0.0 + with: + context: . + push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..93bcc5e4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,58 @@ +# Dockerfile to build and serve datagateway-api + +# Build stage +FROM python:3.11-alpine3.17 as builder + +WORKDIR /datagateway-api-build + +COPY README.md poetry.lock pyproject.toml ./ +COPY datagateway_api/ datagateway_api/ + +RUN --mount=type=cache,target=/root/.cache \ + set -eux; \ + \ + python3 -m pip install 'poetry~=1.3.2'; \ + poetry build; + + +# Install & run stage +FROM python:3.11-alpine3.17 + +WORKDIR /datagateway-api-run + +COPY --from=builder /datagateway-api-build/dist/datagateway_api-*.whl /tmp/ + +RUN --mount=type=cache,target=/root/.cache \ + set -eux; \ + \ + python3 -m pip install \ + 'gunicorn~=20.1.0' \ + /tmp/datagateway_api-*.whl; \ + \ + # Create a symlink to the installed python module \ + DATAGATEWAY_API_LOCATION="$(python3 -m pip show datagateway_api | awk '/^Location:/ { print $2 }')"; \ + ln -s "$DATAGATEWAY_API_LOCATION/datagateway_api/" datagateway_api; \ + \ + # Create config.yaml and search_api_mapping.json from their .example files \ + cp datagateway_api/config.yaml.example datagateway_api/config.yaml; \ + cp datagateway_api/search_api_mapping.json.example datagateway_api/search_api_mapping.json; \ + \ + # Create a non-root user to run as \ + addgroup -S datagateway-api; \ + adduser -S -D -G datagateway-api -H -h /datagateway-api-run datagateway-api; \ + \ + # Change ownership of config.yaml - the entrypoint script will need to edit it \ + chown datagateway-api:datagateway-api datagateway_api/config.yaml; + +USER datagateway-api + +ENV ICAT_URL="http://localhost" +ENV ICAT_CHECK_CERT="false" +ENV LOG_LOCATION="/dev/stdout" + +COPY docker/docker-entrypoint.sh /usr/local/bin/ +ENTRYPOINT ["docker-entrypoint.sh"] + +# Serve the application using gunicorn - production ready WSGI server +CMD ["gunicorn", "-b", "0.0.0.0:8000", "datagateway_api.wsgi"] +EXPOSE 8000 diff --git a/README.md b/README.md index 51b1f713..7e5d91a9 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,32 @@ If using Python 3.10, please use Payara 5 on the ICAT stack which the API is bei pointed at. There is a known issue when making HTTPS connections to Payara (via Python ICAT). +It is also possible to run the API inside Docker. The `Dockerfile` can be used to build +a Docker image which in turn can be used to create a container. The `Dockerfile` is +configured to create a production image and runs a Gunicorn serve on port `8000` when a +container is started. Environment variables have also been defined in the `Dockerfile` +to allow for values to be passed at runtime to future running containers. These values +are used by the `docker/docker-entrypoint.sh` script to update the config values in the +`config.yaml` file. The environment varialbes are: +- `ICAT_URL` (Default value: `http://localhost`) +- `ICAT_CHECK_CERT` (Default value: `false`) +- `LOG_LOCATION` (Default value: `/dev/stdout`) + +To build an image, run: +```bash +docker build -t datagateway_api_image . +``` + +To start a container on port `8000` from the image that you just built, run: +```bash +docker run -p 8000:8000 --name datagateway_api_container datagateway_api_image +``` + +If you want to pass values for the environment variables then instead run: +```bash +docker run -p 8000:8000 --name datagateway_api_container --env ICAT_URL=https://127.0.0.1:8181 --env ICAT_CHECK_CERT=true --env LOG_LOCATION=/datagateway-api-run/logs.log datagateway_api_image +``` + ## DataGateway API Authentication Each request requires a valid session ID to be provided in the Authorization header. diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh new file mode 100755 index 00000000..19b9ec97 --- /dev/null +++ b/docker/docker-entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/sh -eu + +# Use a tempfile instead of sed -i so that only the file, not the directory needs to be writable +TEMPFILE="$(mktemp)" + +# Set values in config.yaml from environment variables +# No quotes for icat_check_cert because it's boolean +sed -e "s|icat_url: \".*\"|icat_url: \"$ICAT_URL\"|" \ + -e "s|icat_check_cert: .*|icat_check_cert: $ICAT_CHECK_CERT|" \ + -e "s|log_location: \".*\"|log_location: \"$LOG_LOCATION\"|" \ + datagateway_api/config.yaml > "$TEMPFILE" + +cat "$TEMPFILE" > datagateway_api/config.yaml +rm "$TEMPFILE" + +# Run the CMD instruction +exec "$@"