-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathDockerfile
205 lines (164 loc) · 6.87 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# Fetch and extract the TGI sources
FROM alpine AS tgi
# TGI version 3.0.0 by default
ARG TGI_VERSION=v3.0.0
RUN test -n ${TGI_VERSION:?}
RUN mkdir -p /tgi
ADD https://github.com/huggingface/text-generation-inference/archive/${TGI_VERSION}.tar.gz /tgi/sources.tar.gz
RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
# Build cargo components (adapted from TGI original Dockerfile)
# Note: we cannot use the cargo-chef base image as it uses python 3.11
FROM ubuntu:22.04 AS chef
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
curl ca-certificates build-essential \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN cargo install cargo-chef --locked
WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
FROM chef AS planner
COPY text-generation-inference/Cargo.toml Cargo.toml
COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
COPY --from=tgi /tgi/router router
COPY --from=tgi /tgi/backends backends
COPY --from=tgi /tgi/launcher launcher
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
ARG ENABLE_GOOGLE_FEATURE
RUN echo "Google Feature Status: ${ENABLE_GOOGLE_FEATURE}"
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
unzip python3-dev libssl-dev pkg-config \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP
COPY text-generation-inference/Cargo.toml Cargo.toml
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
COPY --from=tgi /tgi/router router
COPY --from=tgi /tgi/backends backends
COPY --from=tgi /tgi/launcher launcher
RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
cargo build --profile release-opt --features google; \
else \
cargo build --profile release-opt; \
fi
# Python base image
FROM ubuntu:22.04 AS base
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
python3-pip \
python3-setuptools \
python-is-python3 \
git \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip
ARG ENABLE_GOOGLE_FEATURE
ARG VERSION='0.2.3.dev0'
RUN test -n ${VERSION:?}
FROM base AS optimum-tpu-installer
COPY . /tmp/src
RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
# If we are building for GCP, we need to clone the optimum-tpu repo as this is built from the huggingface/Google-Cloud-Containers repository and not the huggingface/optimum-tpu repository
git clone https://github.com/huggingface/optimum-tpu.git /opt/optimum-tpu && \
cd /opt/optimum-tpu && git checkout v${VERSION}; \
fi && \
# Check if the optimum-tpu repo is cloned properly
cp -a /tmp/src /opt/optimum-tpu && \
if [ ! -d "/opt/optimum-tpu/optimum" ]; then \
echo "Error: Building from incorrect repository. This build must be run from optimum-tpu repo. If building from google-cloud-containers repo, set ENABLE_GOOGLE_FEATURE=1 to automatically clone optimum-tpu" && \
exit 1; \
fi
# Python server build image
FROM base AS pyserver
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
make \
python3-venv \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN install -d /pyserver
WORKDIR /pyserver
COPY --from=optimum-tpu-installer /opt/optimum-tpu/text-generation-inference/server server
COPY --from=tgi /tgi/proto proto
RUN pip3 install -r server/build-requirements.txt
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSION} make -C server gen-server
# TPU base image (used for deployment)
FROM base AS tpu_base
ARG VERSION=${VERSION}
# Install system prerequisites
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
libpython3.10 \
git \
gnupg2 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# Update pip
RUN pip install --upgrade pip
# Install HuggingFace packages
ARG TRANSFORMERS_VERSION='4.46.3'
ARG ACCELERATE_VERSION='1.1.1'
ARG SAFETENSORS_VERSION='0.4.5'
ARG ENABLE_GOOGLE_FEATURE
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV VERSION=${VERSION}
ENV PORT=${ENABLE_GOOGLE_FEATURE:+8080}
ENV PORT=${PORT:-80}
ENV HF_HOME=${ENABLE_GOOGLE_FEATURE:+/tmp}
ENV HF_HOME=${HF_HOME:-/data}
# Install requirements for optimum-tpu, then for TGI then optimum-tpu
RUN python3 -m pip install hf_transfer safetensors==${SAFETENSORS_VERSION} typer
COPY --from=optimum-tpu-installer /opt/optimum-tpu /opt/optimum-tpu
RUN python3 /opt/optimum-tpu/optimum/tpu/cli.py install-jetstream-pytorch --yes
RUN python3 -m pip install -e /opt/optimum-tpu \
-f https://storage.googleapis.com/libtpu-releases/index.html
# Install router
COPY --from=builder /usr/src/target/release-opt/text-generation-router-v2 /usr/local/bin/text-generation-router
# Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
# Install python server
COPY --from=pyserver /pyserver/build/dist dist
RUN pip install dist/text_generation_server*.tar.gz
# TPU compatible image for Inference Endpoints
FROM tpu_base AS inference-endpoint
COPY text-generation-inference/docker/entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
FROM tpu_base AS google-cloud-containers
# Install Google specific components if ENABLE_GOOGLE_FEATURE is set
RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
curl \
git && \
rm -rf /var/lib/apt/lists/* && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y; \
fi
# Custom entrypoint for Google
COPY --chmod=775 containers/tgi/tpu/${VERSION}/entrypoint.sh* entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
# TPU compatible image
FROM tpu_base
ENTRYPOINT ["text-generation-launcher"]