-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactoring: reimplement Docker strategy (#3162)
* setup base images * add cpu flavor * use the same Dockerfile for cpu and gpu * better naming, add docs * add docker workflow * add missing image input * change cwd for bake * also push api images * try conditional tagging for releases * revert testing code * update docker readme * document variable override * use Python 3.10 * allow empty HAYSTACK_EXTRAS * Apply suggestions from code review Co-authored-by: Sara Zan <[email protected]> * remove repo description step, can't make it work so far * add docs to the last step as it's tricky * manage tags for the newest images * tests are passing, checking in the last bit Co-authored-by: Sara Zan <[email protected]>
- Loading branch information
Showing
5 changed files
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
name: Release Docker images | ||
|
||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- main | ||
tags: | ||
- v* | ||
|
||
env: | ||
DOCKER_REPO_NAME: deepset/haystack | ||
|
||
jobs: | ||
build-and-push: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v2 | ||
|
||
- name: Login to DockerHub | ||
uses: docker/login-action@v1 | ||
with: | ||
username: ${{ secrets.DOCKER_HUB_USER }} | ||
password: ${{ secrets.DOCKER_HUB_TOKEN }} | ||
|
||
- name: Docker meta | ||
id: meta | ||
uses: docker/metadata-action@v4 | ||
with: | ||
images: $DOCKER_REPO_NAME | ||
|
||
- name: Build base images | ||
uses: docker/bake-action@v2 | ||
env: | ||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} | ||
with: | ||
workdir: docker | ||
targets: base | ||
push: true | ||
|
||
- name: Build api images | ||
uses: docker/bake-action@v2 | ||
env: | ||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} | ||
BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} | ||
with: | ||
workdir: docker | ||
targets: api | ||
push: true | ||
|
||
- name: Get latest version of Haystack | ||
id: latest-version | ||
uses: pozetroninc/github-action-get-latest-release@master | ||
if: startsWith(github.ref, 'refs/tags/') | ||
with: | ||
repository: ${{ github.repository }} | ||
excludes: prerelease, draft | ||
|
||
- name: Compare current version with latest | ||
uses: madhead/semver-utils@latest | ||
id: version | ||
if: startsWith(github.ref, 'refs/tags/') | ||
with: | ||
# Version being built | ||
version: ${{ github.ref_name }} | ||
# Compare to latest | ||
compare-to: ${{ steps.latest-version.outputs.release }} | ||
|
||
- name: Use latest | ||
if: steps.version.outputs.comparison-result == '>' | ||
run: | | ||
echo ${{ steps.version.outputs.comparison-result }}; | ||
echo ${{ steps.latest-version.outputs.release }}; | ||
# This step should only run when we release a new minor, so | ||
# that we can tag the most recent image without the version number. | ||
# For example, if the previous step builds `deepset/haystack:cpu-1.8.0`, | ||
# this builds `deepset/haystack:cpu` | ||
- name: Build api images no version in tag | ||
uses: docker/bake-action@v2 | ||
if: steps.version.outputs.comparison-result == '>' | ||
env: | ||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} | ||
BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} | ||
with: | ||
workdir: docker | ||
targets: api-latest | ||
push: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
ARG base_image_tag | ||
|
||
FROM deepset/haystack:${base_image_tag} | ||
|
||
# Create a folder for the /file-upload API endpoint with write permissions | ||
RUN mkdir -p /opt/file-upload && chmod 777 /opt/file-upload | ||
|
||
# Tell rest_api which folder to use for uploads | ||
ENV FILE_UPLOAD_PATH="/opt/file-upload" | ||
|
||
EXPOSE 8000 | ||
|
||
CMD ["gunicorn", "rest_api.application:app", "-b", "0.0.0.0", "-k", "uvicorn.workers.UvicornWorker", "--workers", "1", "--timeout", "180"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
ARG build_image | ||
ARG base_immage | ||
|
||
FROM $build_image AS build-image | ||
|
||
ARG haystack_version | ||
ARG haystack_extras | ||
ARG torch_scatter | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
build-essential gcc git curl \ | ||
tesseract-ocr libtesseract-dev poppler-utils | ||
|
||
# Install PDF converter | ||
RUN curl -O https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && \ | ||
tar -xvf xpdf-tools-linux-4.04.tar.gz && \ | ||
cp xpdf-tools-linux-4.04/bin64/pdftotext /opt && \ | ||
rm -rf xpdf-tools-linux-4.04 | ||
|
||
# Shallow clone Haystack repo, we'll install from the local sources | ||
RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack | ||
WORKDIR /opt/haystack | ||
|
||
# Use a virtualenv we can copy over the next build stage | ||
RUN python -m venv --system-site-packages /opt/venv | ||
ENV PATH="/opt/venv/bin:$PATH" | ||
|
||
RUN pip install --upgrade pip && \ | ||
pip install --no-cache-dir .${haystack_extras} && \ | ||
pip install --no-cache-dir ./rest_api && \ | ||
pip install --no-cache-dir torch-scatter -f $torch_scatter | ||
|
||
FROM $base_immage AS final | ||
|
||
COPY --from=build-image /opt/venv /opt/venv | ||
COPY --from=build-image /opt/pdftotext /usr/local/bin | ||
|
||
ENV PATH="/opt/venv/bin:$PATH" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Haystack Docker image | ||
|
||
Haystack is an end-to-end framework that enables you to build powerful and production-ready | ||
pipelines for different search use cases. The Docker image comes with a web service | ||
configured to serve Haystack's `rest_api` to ease pipelines' deployments in containerized | ||
environments. | ||
|
||
Start the Docker container binding the TCP port `8000` locally: | ||
```sh | ||
docker run -p 8000:8000 deepset/haystack | ||
``` | ||
|
||
If you need the container to access other services available in the host: | ||
```sh | ||
docker run -p 8000:8000 --network="host" deepset/haystack | ||
``` | ||
|
||
## Image variants | ||
|
||
The Docker image comes in two variants: | ||
- `haystack:cpu-<version>`: this image is smaller but doesn't support GPU | ||
- `haystack:gpu-<version>`: this image comes with the Cuda runtime and is capable of running on GPUs | ||
|
||
|
||
## Image development | ||
|
||
Images are built with BuildKit and we use `bake` to orchestrate the process. | ||
You can build a specific image by simply run: | ||
```sh | ||
docker buildx bake gpu | ||
``` | ||
|
||
You can override any `variable` defined in the `docker-bake.hcl` file and build custom | ||
images, for example if you want to use a branch from the Haystack repo: | ||
```sh | ||
HAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake gpu --no-cache | ||
``` | ||
|
||
# License | ||
|
||
View [license information](https://github.com/deepset-ai/haystack/blob/main/LICENSE) for | ||
the software contained in this image. | ||
|
||
As with all Docker images, these likely also contain other software which may be under | ||
other licenses (such as Bash, etc from the base distribution, along with any direct or | ||
indirect dependencies of the primary software being contained). | ||
|
||
As for any pre-built image usage, it is the image user's responsibility to ensure that any | ||
use of this image complies with any relevant licenses for all software contained within. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
variable "HAYSTACK_VERSION" { | ||
default = "main" | ||
} | ||
|
||
variable "GITHUB_REF" { | ||
default = "" | ||
} | ||
|
||
variable "IMAGE_NAME" { | ||
default = "deepset/haystack" | ||
} | ||
|
||
variable "IMAGE_TAG_SUFFIX" { | ||
default = "local" | ||
} | ||
|
||
variable "BASE_IMAGE_TAG_SUFFIX" { | ||
default = "local" | ||
} | ||
|
||
variable "HAYSTACK_EXTRAS" { | ||
default = "" | ||
} | ||
|
||
group "base" { | ||
targets = ["base", "base-gpu"] | ||
} | ||
|
||
group "api" { | ||
targets = ["cpu", "gpu"] | ||
} | ||
|
||
group "api-latest" { | ||
targets = ["cpu-latest", "gpu-latest"] | ||
} | ||
|
||
group "all" { | ||
targets = ["base", "base-gpu", "cpu", "gpu"] | ||
} | ||
|
||
target "docker-metadata-action" {} | ||
|
||
target "base" { | ||
dockerfile = "Dockerfile.base" | ||
tags = ["${IMAGE_NAME}:base-${IMAGE_TAG_SUFFIX}"] | ||
args = { | ||
build_image = "python:3.10-slim" | ||
base_immage = "python:3.10-slim" | ||
haystack_version = "${HAYSTACK_VERSION}" | ||
haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores,crawler,preprocessing,ocr,onnx,beir]" | ||
torch_scatter = "https://data.pyg.org/whl/torch-1.12.0+cpu.html" | ||
} | ||
} | ||
|
||
target "base-gpu" { | ||
dockerfile = "Dockerfile.base" | ||
tags = ["${IMAGE_NAME}:base-gpu-${IMAGE_TAG_SUFFIX}"] | ||
args = { | ||
build_image = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime" | ||
base_immage = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime" | ||
haystack_version = "${HAYSTACK_VERSION}" | ||
haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores-gpu,crawler,preprocessing,ocr,onnx-gpu,beir]" | ||
torch_scatter = "https://data.pyg.org/whl/torch-1.12.1%2Bcu113.html" | ||
} | ||
} | ||
|
||
target "cpu" { | ||
dockerfile = "Dockerfile.api" | ||
tags = ["${IMAGE_NAME}:cpu-${IMAGE_TAG_SUFFIX}"] | ||
args = { | ||
base_image_tag = "base-${BASE_IMAGE_TAG_SUFFIX}" | ||
} | ||
} | ||
|
||
target "cpu-latest" { | ||
inherits = ["cpu"] | ||
tags = ["${IMAGE_NAME}:cpu"] | ||
} | ||
|
||
target "gpu" { | ||
dockerfile = "Dockerfile.api" | ||
tags = ["${IMAGE_NAME}:gpu-${IMAGE_TAG_SUFFIX}"] | ||
args = { | ||
base_image_tag = "base-gpu-${BASE_IMAGE_TAG_SUFFIX}" | ||
} | ||
platforms = [ | ||
"linux/amd64" | ||
] | ||
} | ||
|
||
target "gpu-latest" { | ||
inherits = ["gpu"] | ||
tags = ["${IMAGE_NAME}:gpu"] | ||
} |