From 2789103009e794d4fde437c6d068e1ad221f9432 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Fri, 19 Apr 2024 09:55:38 -0700 Subject: [PATCH 1/7] TRT-LLM build --- build.py | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/build.py b/build.py index ce691d5420..04377a848e 100755 --- a/build.py +++ b/build.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -69,10 +69,10 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - "2.45.0": ( - "24.04", # triton container - "24.04", # upstream container - "1.17.3", # ORT + "2.46.0dev": ( + "24.05dev", # triton container + "24.03", # upstream container + "1.17.2", # ORT "2023.3.0", # ORT OpenVINO "2023.3.0", # Standalone OpenVINO "3.2.6", # DCGM version @@ -1113,6 +1113,12 @@ def create_dockerfile_linux( && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\ ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a +# Install TensorRT-LLM +RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \\ + && rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl +RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf +RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf + ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ with open(os.path.join(ddir, dockerfile_name), "w") as dfile: @@ -1708,6 +1714,30 @@ def tensorrtllm_prebuild(cmake_script): # Export the TRT_ROOT environment variable cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt") cmake_script.cmd("export ARCH=$(uname -m)") + cmake_script.cmd( + 'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"' + ) + + +def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir): + # TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location + cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm") + cmake_script.mkdir(cmake_destination_dir) + # Copy over the TRT-LLM wheel for later installation + cmake_script.cp( + os.path.join(tensorrtllm_be_dir, "tensorrt_llm", "build", "tensorrt_llm-*.whl"), + cmake_destination_dir, + ) + + # Copy over the TRT-LLM backend libraries + cmake_script.cp( + os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"), + cmake_destination_dir, + ) + cmake_script.cp( + os.path.join(tensorrtllm_be_dir, "build", "triton_tensorrtllm_worker"), + cmake_destination_dir, + ) def backend_build( @@ -1742,6 +1772,10 @@ def backend_build( ) cmake_script.makeinstall() + if be == "tensorrtllm": + tensorrtllm_be_dir = os.path.join(build_dir, be) + tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir) + cmake_script.mkdir(os.path.join(install_dir, "backends")) cmake_script.rmdir(os.path.join(install_dir, "backends", be)) From fd69a6716d51ff9b3ac16dabab4ab7759ea696e0 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Fri, 19 Apr 2024 09:57:47 -0700 Subject: [PATCH 2/7] Update versions --- build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build.py b/build.py index 04377a848e..325e63d4f7 100755 --- a/build.py +++ b/build.py @@ -69,10 +69,10 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - "2.46.0dev": ( - "24.05dev", # triton container - "24.03", # upstream container - "1.17.2", # ORT + "2.45.0": ( + "24.04", # triton container + "24.04", # upstream container + "1.17.3", # ORT "2023.3.0", # ORT OpenVINO "2023.3.0", # Standalone OpenVINO "3.2.6", # DCGM version From 51bee39749fd2cf93c98723b17472a3d10f5ff99 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Fri, 19 Apr 2024 15:16:40 -0700 Subject: [PATCH 3/7] Remove statment, as unused --- build.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.py b/build.py index 325e63d4f7..a3b80e491c 100755 --- a/build.py +++ b/build.py @@ -1103,9 +1103,6 @@ def create_dockerfile_linux( && pip3 install transformers # Uninstall unused nvidia packages -RUN if pip freeze | grep -q "nvidia.*"; then \\ - pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \\ - fi RUN pip cache purge # Drop the static libs From 36baf3ee3c62a03ec10e73380853f89f8c36811b Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Fri, 19 Apr 2024 17:06:09 -0700 Subject: [PATCH 4/7] Remove cache --- build.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.py b/build.py index a3b80e491c..e33dde33ea 100755 --- a/build.py +++ b/build.py @@ -1102,9 +1102,6 @@ def create_dockerfile_linux( RUN python3 -m pip install --upgrade pip \\ && pip3 install transformers -# Uninstall unused nvidia packages -RUN pip cache purge - # Drop the static libs RUN ARCH="$(uname -i)" \\ && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\ From 4f6ad97ad84eba50300a049b0dfcfe47225a77ac Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 22 Apr 2024 10:34:53 -0700 Subject: [PATCH 5/7] add cmake option to set CXX11 ABI --- build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build.py b/build.py index e33dde33ea..6e21828948 100755 --- a/build.py +++ b/build.py @@ -831,6 +831,7 @@ def tensorrtllm_cmake_args(images): ), ] cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True)) + cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True)) return cargs From 35595d9ded9661ce0b0a3f0b8e437b427aa2706a Mon Sep 17 00:00:00 2001 From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:02:18 -0700 Subject: [PATCH 6/7] Mchornyi krish 24.04 (#7149) * Enable TensorRT-LLM build outside of CMake * TensorRT-LLM requires lower version of cuDNN --- build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build.py b/build.py index 6e21828948..ac50c9d1f4 100755 --- a/build.py +++ b/build.py @@ -819,6 +819,8 @@ def fastertransformer_cmake_args(): def tensorrtllm_cmake_args(images): + cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig") + cmake_script.cmd("python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt") cargs = [ cmake_backend_arg( "tensorrtllm", @@ -830,7 +832,6 @@ def tensorrtllm_cmake_args(images): "tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include" ), ] - cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True)) cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True)) return cargs @@ -1094,6 +1095,8 @@ def create_dockerfile_linux( if "tensorrtllm" in backends: df += """ # Remove TRT contents that are not needed in runtime +RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig + RUN ARCH="$(uname -i)" \\ && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\ && rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\ From 37800d0ccd8289e6b48f57bb07c63ef3c3757730 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 22 Apr 2024 19:08:13 -0700 Subject: [PATCH 7/7] Format --- build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build.py b/build.py index ac50c9d1f4..4e012f2011 100755 --- a/build.py +++ b/build.py @@ -820,7 +820,9 @@ def fastertransformer_cmake_args(): def tensorrtllm_cmake_args(images): cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig") - cmake_script.cmd("python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt") + cmake_script.cmd( + "python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt" + ) cargs = [ cmake_backend_arg( "tensorrtllm",