From 2789103009e794d4fde437c6d068e1ad221f9432 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Fri, 19 Apr 2024 09:55:38 -0700
Subject: [PATCH 1/7] TRT-LLM build

---
 build.py | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/build.py b/build.py
index ce691d5420..04377a848e 100755
--- a/build.py
+++ b/build.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -69,10 +69,10 @@
 # incorrectly load the other version of the openvino libraries.
 #
 TRITON_VERSION_MAP = {
-    "2.45.0": (
-        "24.04",  # triton container
-        "24.04",  # upstream container
-        "1.17.3",  # ORT
+    "2.46.0dev": (
+        "24.05dev",  # triton container
+        "24.03",  # upstream container
+        "1.17.2",  # ORT
         "2023.3.0",  # ORT OpenVINO
         "2023.3.0",  # Standalone OpenVINO
         "3.2.6",  # DCGM version
@@ -1113,6 +1113,12 @@ def create_dockerfile_linux(
       && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\
           ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a
 
+# Install TensorRT-LLM
+RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \\
+        && rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl
+RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf
+RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf
+
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
     with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
@@ -1708,6 +1714,30 @@ def tensorrtllm_prebuild(cmake_script):
     # Export the TRT_ROOT environment variable
     cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
     cmake_script.cmd("export ARCH=$(uname -m)")
+    cmake_script.cmd(
+        'export LD_LIBRARY_PATH="/usr/local/cuda/compat/lib.real:${LD_LIBRARY_PATH}"'
+    )
+
+
+def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir):
+    # TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location
+    cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm")
+    cmake_script.mkdir(cmake_destination_dir)
+    # Copy over the TRT-LLM wheel for later installation
+    cmake_script.cp(
+        os.path.join(tensorrtllm_be_dir, "tensorrt_llm", "build", "tensorrt_llm-*.whl"),
+        cmake_destination_dir,
+    )
+
+    # Copy over the TRT-LLM backend libraries
+    cmake_script.cp(
+        os.path.join(tensorrtllm_be_dir, "build", "libtriton_tensorrtllm*.so"),
+        cmake_destination_dir,
+    )
+    cmake_script.cp(
+        os.path.join(tensorrtllm_be_dir, "build", "triton_tensorrtllm_worker"),
+        cmake_destination_dir,
+    )
 
 
 def backend_build(
@@ -1742,6 +1772,10 @@ def backend_build(
     )
     cmake_script.makeinstall()
 
+    if be == "tensorrtllm":
+        tensorrtllm_be_dir = os.path.join(build_dir, be)
+        tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir)
+
     cmake_script.mkdir(os.path.join(install_dir, "backends"))
     cmake_script.rmdir(os.path.join(install_dir, "backends", be))
 

From fd69a6716d51ff9b3ac16dabab4ab7759ea696e0 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Fri, 19 Apr 2024 09:57:47 -0700
Subject: [PATCH 2/7] Update versions

---
 build.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/build.py b/build.py
index 04377a848e..325e63d4f7 100755
--- a/build.py
+++ b/build.py
@@ -69,10 +69,10 @@
 # incorrectly load the other version of the openvino libraries.
 #
 TRITON_VERSION_MAP = {
-    "2.46.0dev": (
-        "24.05dev",  # triton container
-        "24.03",  # upstream container
-        "1.17.2",  # ORT
+    "2.45.0": (
+        "24.04",  # triton container
+        "24.04",  # upstream container
+        "1.17.3",  # ORT
         "2023.3.0",  # ORT OpenVINO
         "2023.3.0",  # Standalone OpenVINO
         "3.2.6",  # DCGM version

From 51bee39749fd2cf93c98723b17472a3d10f5ff99 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 19 Apr 2024 15:16:40 -0700
Subject: [PATCH 3/7] Remove statment, as unused

---
 build.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/build.py b/build.py
index 325e63d4f7..a3b80e491c 100755
--- a/build.py
+++ b/build.py
@@ -1103,9 +1103,6 @@ def create_dockerfile_linux(
       && pip3 install transformers
 
 # Uninstall unused nvidia packages
-RUN if pip freeze | grep -q "nvidia.*"; then \\
-        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \\
-    fi
 RUN pip cache purge
 
 # Drop the static libs

From 36baf3ee3c62a03ec10e73380853f89f8c36811b Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Fri, 19 Apr 2024 17:06:09 -0700
Subject: [PATCH 4/7] Remove cache

---
 build.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/build.py b/build.py
index a3b80e491c..e33dde33ea 100755
--- a/build.py
+++ b/build.py
@@ -1102,9 +1102,6 @@ def create_dockerfile_linux(
 RUN python3 -m pip install --upgrade pip \\
       && pip3 install transformers
 
-# Uninstall unused nvidia packages
-RUN pip cache purge
-
 # Drop the static libs
 RUN ARCH="$(uname -i)" \\
       && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\

From 4f6ad97ad84eba50300a049b0dfcfe47225a77ac Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Mon, 22 Apr 2024 10:34:53 -0700
Subject: [PATCH 5/7] add cmake option to set CXX11 ABI

---
 build.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.py b/build.py
index e33dde33ea..6e21828948 100755
--- a/build.py
+++ b/build.py
@@ -831,6 +831,7 @@ def tensorrtllm_cmake_args(images):
         ),
     ]
     cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
+    cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
     return cargs
 
 

From 35595d9ded9661ce0b0a3f0b8e437b427aa2706a Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Mon, 22 Apr 2024 19:02:18 -0700
Subject: [PATCH 6/7] Mchornyi krish 24.04 (#7149)

* Enable TensorRT-LLM build outside of CMake

* TensorRT-LLM requires lower version of cuDNN
---
 build.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/build.py b/build.py
index 6e21828948..ac50c9d1f4 100755
--- a/build.py
+++ b/build.py
@@ -819,6 +819,8 @@ def fastertransformer_cmake_args():
 
 
 def tensorrtllm_cmake_args(images):
+    cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig")
+    cmake_script.cmd("python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt")
     cargs = [
         cmake_backend_arg(
             "tensorrtllm",
@@ -830,7 +832,6 @@ def tensorrtllm_cmake_args(images):
             "tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include"
         ),
     ]
-    cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
     cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True))
     return cargs
 
@@ -1094,6 +1095,8 @@ def create_dockerfile_linux(
     if "tensorrtllm" in backends:
         df += """
 # Remove TRT contents that are not needed in runtime
+RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig
+
 RUN ARCH="$(uname -i)" \\
       && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
       && rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\

From 37800d0ccd8289e6b48f57bb07c63ef3c3757730 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Mon, 22 Apr 2024 19:08:13 -0700
Subject: [PATCH 7/7] Format

---
 build.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/build.py b/build.py
index ac50c9d1f4..4e012f2011 100755
--- a/build.py
+++ b/build.py
@@ -820,7 +820,9 @@ def fastertransformer_cmake_args():
 
 def tensorrtllm_cmake_args(images):
     cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig")
-    cmake_script.cmd("python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt")
+    cmake_script.cmd(
+        "python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt"
+    )
     cargs = [
         cmake_backend_arg(
             "tensorrtllm",