Skip to content

Commit

Permalink
TRT-LLM backend build changes (#6406)
Browse files Browse the repository at this point in the history
* Update url

* Debugging

* Debugging

* Update url

* Fix build for TRT-LLM backend

* Remove TRTLLM TRT and CUDA versions

* Fix up unused var

* Fix up dir name

* FIx cmake patch

* Remove previous TRT version

* Install required packages for example models

* Remove packages that are only needed for testing
  • Loading branch information
krishung5 committed Oct 13, 2023
1 parent e1d7821 commit e25c242
Showing 1 changed file with 76 additions and 31 deletions.
107 changes: 76 additions & 31 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@
"2023.0.0", # Standalone OpenVINO
"2.4.7", # DCGM version
"py310_23.1.0-1", # Conda version
"9.1.0.1", # TRT version for building TRT-LLM backend
"12.2", # CUDA version for building TRT-LLM backend
"0.2.0", # vLLM version
)
}
Expand Down Expand Up @@ -884,19 +882,8 @@ def tensorrtllm_cmake_args(images):
None,
images["base"],
),
cmake_backend_arg(
"tensorrtllm",
"TENSORRT_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][7],
),
cmake_backend_arg(
"tensorrtllm",
"CUDA_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][8],
),
]
cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
return cargs


Expand Down Expand Up @@ -1315,32 +1302,75 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
pip3 install --upgrade numpy && \
rm -rf /var/lib/apt/lists/*
"""
# FIXME: Use the postbuild script here
# Add dependencies needed for tensorrtllm backend
if "tensorrtllm" in backends:
be = "tensorrtllm"
# FIXME: Update the url
url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
backends[be]
)
# # FIXME: Update the url
# url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
# backends[be]
# )

# response = requests.get(url)
# spec = importlib.util.spec_from_loader(
# "trtllm_buildscript", loader=None, origin=url
# )
# trtllm_buildscript = importlib.util.module_from_spec(spec)
# exec(response.content, trtllm_buildscript.__dict__)
# df += trtllm_buildscript.create_postbuild(
# backends[be] # repo tag
# )
df += """
WORKDIR /workspace
response = requests.get(url)
spec = importlib.util.spec_from_loader(
"trtllm_buildscript", loader=None, origin=url
)
trtllm_buildscript = importlib.util.module_from_spec(spec)
exec(response.content, trtllm_buildscript.__dict__)
df += trtllm_buildscript.create_postbuild(
argmap["TRT_LLM_TRT_VERSION"], argmap["TRT_LLM_CUDA_VERSION"]
# Remove previous TRT installation
RUN apt-get remove --purge -y tensorrt* libnvinfer*
RUN pip uninstall -y tensorrt
# Install new version of TRT using the script from TRT-LLM
RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
RUN cd tensorrtllm_backend && git submodule update --init --recursive
RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
RUN rm -fr tensorrtllm_backend
""".format(
backends[be],
os.environ["REMOVE_ME_TRTLLM_USERNAME"],
os.environ["REMOVE_ME_TRTLLM_TOKEN"],
)

df += """
RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
ENV TRT_ROOT=/usr/local/tensorrt
# Remove TRT contents that are not needed in runtime
RUN ARCH="$(uname -i)" && \
rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
# Install required packages for TRT-LLM models
RUN python3 -m pip install --upgrade pip && \
pip3 install transformers && \
pip3 install torch
# Uninstall unused nvidia packages
RUN if pip freeze | grep -q "nvidia.*"; then \
pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
fi
RUN pip cache purge
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
"""

if "vllm" in backends:
# [DLIS-5606] Build Conda environment for vLLM backend
# Remove Pip install once vLLM backend moves to Conda environment.
df += """
# vLLM needed for vLLM backend
RUN pip3 install vllm=={}
""".format(
TRITON_VERSION_MAP[FLAGS.version][9]
TRITON_VERSION_MAP[FLAGS.version][7]
)

df += """
Expand Down Expand Up @@ -1505,8 +1535,6 @@ def create_build_dockerfiles(
if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
else TRITON_VERSION_MAP[FLAGS.version][6],
}
dockerfileargmap["TRT_LLM_TRT_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][7]
dockerfileargmap["TRT_LLM_CUDA_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][8]

# For CPU-only image we need to copy some cuda libraries and dependencies
# since we are using PyTorch and TensorFlow containers that
Expand Down Expand Up @@ -1797,6 +1825,16 @@ def tensorrtllm_prebuild(cmake_script):
cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
cmake_script.cmd("export ARCH=$(uname -m)")

# FIXME: Update the file structure to the one Triton expects. This is a temporary fix
# to get the build working for r23.10.
# Uncomment the patch once moving to the GitHub repo
# cmake_script.cmd(
# "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
# )
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")


def backend_build(
be,
Expand All @@ -1820,8 +1858,15 @@ def backend_build(
cmake_script.cwd(build_dir)
# FIXME: Use GitHub repo
if be == "tensorrtllm":
cmake_script.gitclone(
backend_repo(be), tag, be, "https://gitlab-master.nvidia.com/krish"
# cmake_script.gitclone(
# backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
# )
cmake_script.cmd(
"git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format(
tag,
os.environ["REMOVE_ME_TRTLLM_USERNAME"],
os.environ["REMOVE_ME_TRTLLM_TOKEN"],
)
)
else:
cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
Expand Down

0 comments on commit e25c242

Please sign in to comment.