Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TensorRT-LLM #7142

Merged
merged 7 commits into from
Apr 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -819,6 +819,10 @@ def fastertransformer_cmake_args():


def tensorrtllm_cmake_args(images):
cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig")
cmake_script.cmd(
"python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt"
)
cargs = [
cmake_backend_arg(
"tensorrtllm",
Expand All @@ -830,7 +834,7 @@ def tensorrtllm_cmake_args(images):
"tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include"
),
]
cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
return cargs


Expand Down Expand Up @@ -1093,6 +1097,8 @@ def create_dockerfile_linux(
if "tensorrtllm" in backends:
df += """
# Remove TRT contents that are not needed in runtime
RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig

RUN ARCH="$(uname -i)" \\
&& rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
&& rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
Expand All @@ -1102,17 +1108,17 @@ def create_dockerfile_linux(
RUN python3 -m pip install --upgrade pip \\
&& pip3 install transformers

# Uninstall unused nvidia packages
RUN if pip freeze | grep -q "nvidia.*"; then \\
pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \\
fi
RUN pip cache purge
krishung5 marked this conversation as resolved.
Show resolved Hide resolved

# Drop the static libs
RUN ARCH="$(uname -i)" \\
&& rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\
${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a

# Install TensorRT-LLM
RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \\
krishung5 marked this conversation as resolved.
Show resolved Hide resolved
&& rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl
RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf

ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
"""
with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down Expand Up @@ -1708,6 +1714,30 @@ def tensorrtllm_prebuild(cmake_script):
# Export the TRT_ROOT environment variable
cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
cmake_script.cmd("export ARCH=$(uname -m)")
cmake_script.cmd(
'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this line required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because libcuda.so.1 can't be resolved till --runtime=nvidia is passed.
It's a workaround that allows us refer to the links in build container, can be deprecated in future.

You can create task and assign to me.

)


def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir):
# TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location
cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm")
cmake_script.mkdir(cmake_destination_dir)
# Copy over the TRT-LLM wheel for later installation
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "tensorrt_llm", "build", "tensorrt_llm-*.whl"),
cmake_destination_dir,
)

# Copy over the TRT-LLM backend libraries
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"),
cmake_destination_dir,
)
cmake_script.cp(
os.path.join(tensorrtllm_be_dir, "build", "triton_tensorrtllm_worker"),
cmake_destination_dir,
)


def backend_build(
Expand Down Expand Up @@ -1742,6 +1772,10 @@ def backend_build(
)
cmake_script.makeinstall()

if be == "tensorrtllm":
tensorrtllm_be_dir = os.path.join(build_dir, be)
tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir)

cmake_script.mkdir(os.path.join(install_dir, "backends"))
cmake_script.rmdir(os.path.join(install_dir, "backends", be))

Expand Down
Loading