Skip to content

Commit

Permalink
Add docker image for BERT e2e inference task (#455)
Browse files Browse the repository at this point in the history
* Add image for e2e bert inference testing, and all its dependencies

* Update git workflow with a new action to verify bert inference image builds

* Update Dockerfile name to not include prefix

* Update git workflow to account for changing of the Dockerfile name for bert inference

* Update bert-inference Dockerfile to install Python from source

* Update bert inference docker build to use relative path instead of absolute

* revert bert inference dockerfile path back to full path from relative
  • Loading branch information
mattcjo authored Jul 16, 2024
1 parent c6f75cf commit f5c1831
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 1 deletion.
7 changes: 6 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: docker build --file e2e2/test/images/nvidia/Dockerfile .
- run: docker build --file e2e2/test/images/nvidia/Dockerfile .
build-bert-inference:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: docker build --file e2e2/test/images/bert-inference/Dockerfile e2e2/test/images/bert-inference
69 changes: 69 additions & 0 deletions e2e2/test/images/bert-inference/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Use the NVIDIA CUDA runtime as a parent image
FROM nvidia/cuda:12.5.0-devel-ubuntu22.04

# Set environment variable to disable interactive prompts
ENV DEBIAN_FRONTEND=noninteractive

# Dependency version numbers
ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12
ARG PIP=pip3

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake \
curl \
emacs \
git \
jq \
libopencv-dev \
software-properties-common \
wget \
unzip \
vim \
pkg-config \
gdb \
lcov \
libbz2-dev \
zlib1g-dev \
openssl \
libssl-dev \
libsqlite3-dev \
libgdbm-dev \
libc6-dev \
libbz2-dev \
libncurses-dev \
tk-dev \
libffi-dev \
libcap-dev \
gnupg2 \
gpg-agent \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

# Install Python
RUN wget -q https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
&& tar -xzf Python-$PYTHON_VERSION.tgz \
&& cd Python-$PYTHON_VERSION \
&& ./configure --enable-shared --prefix=/usr/local \
&& make -j $(nproc) && make install \
&& cd .. && rm -rf ../Python-$PYTHON_VERSION* \
&& ln -s /usr/local/bin/pip3 /usr/bin/pip \
&& ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
&& ${PIP} --no-cache-dir install --upgrade \
pip \
setuptools

# Set the working directory in the container
WORKDIR /app

# Copy only the necessary files into the container at /app
COPY infer.py /app/
COPY requirements.txt /app/

# Install any needed packages specified in requirements.txt
RUN python -m pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
112 changes: 112 additions & 0 deletions e2e2/test/images/bert-inference/infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
import time
import torch
from transformers import BertForPreTraining, BertTokenizer
from torch.utils.data import DataLoader, TensorDataset
import numpy as np


def create_dummy_data(tokenizer, num_samples=100, max_length=128):
# Create dummy input data
sentences = [
"This is a dummy sentence number {}".format(i) for i in range(num_samples)
]
tokenized_inputs = tokenizer(
sentences,
max_length=max_length,
padding="max_length",
truncation=True,
return_tensors="pt",
)
labels = tokenized_inputs.input_ids.detach().clone()

# MLM task: randomly mask some tokens
mlm_probability = 0.15
input_ids, labels = mask_tokens(
tokenized_inputs.input_ids, tokenizer, mlm_probability
)

# NSP task: create dummy pairs
next_sentence_labels = torch.randint(0, 2, (num_samples,))

return TensorDataset(
input_ids, tokenized_inputs.attention_mask, next_sentence_labels
)


def mask_tokens(inputs, tokenizer, mlm_probability):
labels = inputs.clone()
probability_matrix = torch.full(labels.shape, mlm_probability)
special_tokens_mask = [
tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True)
for val in labels.tolist()
]
probability_matrix.masked_fill_(
torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0
)
masked_indices = torch.bernoulli(probability_matrix).bool()
labels[~masked_indices] = -100 # We only compute loss on masked tokens

inputs[masked_indices] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

return inputs, labels


def run_inference(model, tokenizer, batch_size, mode):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

dataset = create_dummy_data(tokenizer)
dataloader = DataLoader(dataset, batch_size=batch_size)

total_time = 0
total_batches = len(dataloader)

with torch.no_grad():
for batch in dataloader:
inputs, masks, next_sentence_labels = batch
inputs, masks, next_sentence_labels = (
inputs.to(device),
masks.to(device),
next_sentence_labels.to(device),
)

start_time = time.time()
outputs = model(
input_ids=inputs,
attention_mask=masks,
next_sentence_label=next_sentence_labels,
)
end_time = time.time()

total_time += end_time - start_time

avg_time_per_batch = total_time / total_batches
throughput = (total_batches * batch_size) / total_time

print(f"Inference Mode: {mode}")
print(f"Average time per batch: {avg_time_per_batch:.4f} seconds")
print(f"Throughput: {throughput:.2f} samples/second")


def main():
# Verify GPU availability
if not torch.cuda.is_available():
raise RuntimeError("GPU isnot available. Exiting")

print("GPU is available")

# Pre-download model and tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForPreTraining.from_pretrained("bert-base-uncased")

mode = os.environ.get("INFERENCE_MODE", "throughput").lower()
batch_size = 1 if mode == "latency" else 8

print(f"Running inference in {mode} mode with batch size {batch_size}")
run_inference(model, tokenizer, batch_size, mode)


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions e2e2/test/images/bert-inference/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
torch==2.3
transformers==4.29
numpy==1.23

0 comments on commit f5c1831

Please sign in to comment.