Skip to content

Commit

Permalink
Fix TensorRT-LLM (#7142)
Browse files Browse the repository at this point in the history
* TRT-LLM build

* Update versions

* Remove statment, as unused

* Remove cache

* add cmake option to set CXX11 ABI

* Mchornyi krish 24.04 (#7149)

* Enable TensorRT-LLM build outside of CMake

* TensorRT-LLM requires lower version of cuDNN

* Format

---------

Co-authored-by: krishung5 <[email protected]>
  • Loading branch information
mc-nv and krishung5 committed Apr 25, 2024
1 parent ddd6c4b commit b10c3c6
Showing 1 changed file with 42 additions and 8 deletions.
50 changes: 42 additions & 8 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -819,6 +819,10 @@ def fastertransformer_cmake_args():


def tensorrtllm_cmake_args(images):
cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig")
cmake_script.cmd(
"python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt"
)
cargs = [
cmake_backend_arg(
"tensorrtllm",
Expand All @@ -830,7 +834,7 @@ def tensorrtllm_cmake_args(images):
"tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include"
),
]
cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
return cargs


Expand Down Expand Up @@ -1093,6 +1097,8 @@ def create_dockerfile_linux(
if "tensorrtllm" in backends:
df += """
# Remove TRT contents that are not needed in runtime
RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
RUN ARCH="$(uname -i)" \\
&& rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
&& rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
Expand All @@ -1102,17 +1108,17 @@ def create_dockerfile_linux(
RUN python3 -m pip install --upgrade pip \\
&& pip3 install transformers
# Uninstall unused nvidia packages
RUN if pip freeze | grep -q "nvidia.*"; then \\
pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \\
fi
RUN pip cache purge
# Drop the static libs
RUN ARCH="$(uname -i)" \\
&& rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\
${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a
# Install TensorRT-LLM
RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \\
&& rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl
RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down Expand Up @@ -1708,6 +1714,30 @@ def tensorrtllm_prebuild(cmake_script):
# Export the TRT_ROOT environment variable
cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
cmake_script.cmd("export ARCH=$(uname -m)")
cmake_script.cmd(
'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"'
)


def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir):
# TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location
cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm")
cmake_script.mkdir(cmake_destination_dir)
# Copy over the TRT-LLM wheel for later installation
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "tensorrt_llm", "build", "tensorrt_llm-*.whl"),
cmake_destination_dir,
)

# Copy over the TRT-LLM backend libraries
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"),
cmake_destination_dir,
)
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "triton_tensorrtllm_worker"),
cmake_destination_dir,
)


def backend_build(
Expand Down Expand Up @@ -1742,6 +1772,10 @@ def backend_build(
)
cmake_script.makeinstall()

if be == "tensorrtllm":
tensorrtllm_be_dir = os.path.join(build_dir, be)
tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir)

cmake_script.mkdir(os.path.join(install_dir, "backends"))
cmake_script.rmdir(os.path.join(install_dir, "backends", be))

Expand Down

0 comments on commit b10c3c6

Please sign in to comment.