From 91048f90605f527f732861a956aeb7d0acff5441 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Mon, 9 Oct 2023 18:35:09 -0700
Subject: [PATCH 01/12] Update url

---
 build.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build.py b/build.py
index a811cd3123..92621f59bd 100755
--- a/build.py
+++ b/build.py
@@ -1319,7 +1319,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
         # FIXME: Update the url
-        url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
+        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
             backends[be]
         )
 
@@ -1821,7 +1821,7 @@ def backend_build(
     # FIXME: Use GitHub repo
     if be == "tensorrtllm":
         cmake_script.gitclone(
-            backend_repo(be), tag, be, "https://gitlab-master.nvidia.com/krish"
+            backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/krish"
         )
     else:
         cmake_script.gitclone(backend_repo(be), tag, be, github_organization)

From a12d8a194d14e594b1ce11ebbc05c61431785c1f Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Mon, 9 Oct 2023 18:40:54 -0700
Subject: [PATCH 02/12] Debugging

---
 build.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/build.py b/build.py
index 92621f59bd..043c06c206 100755
--- a/build.py
+++ b/build.py
@@ -1319,9 +1319,11 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
         # FIXME: Update the url
-        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
-            backends[be]
-        )
+        # url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
+        #     backends[be]
+        # )
+        print("trtllm tag:", backends[be])
+        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/krish-triton-changes/tools/gen_trtllm_dockerfile.py"
 
         response = requests.get(url)
         spec = importlib.util.spec_from_loader(

From a244bca338f677dc46b4bf9fbf5c801980b186d3 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Mon, 9 Oct 2023 18:42:08 -0700
Subject: [PATCH 03/12] Debugging

---
 build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.py b/build.py
index 043c06c206..0d9f67fcdb 100755
--- a/build.py
+++ b/build.py
@@ -1323,7 +1323,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         #     backends[be]
         # )
         print("trtllm tag:", backends[be])
-        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/krish-triton-changes/tools/gen_trtllm_dockerfile.py"
+        url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/main/tools/gen_trtllm_dockerfile.py"
 
         response = requests.get(url)
         spec = importlib.util.spec_from_loader(

From 562518b875772228341a43346c652ab390e1d155 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Mon, 9 Oct 2023 20:05:30 -0700
Subject: [PATCH 04/12] Update url

---
 build.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/build.py b/build.py
index 0d9f67fcdb..de4b03c6be 100755
--- a/build.py
+++ b/build.py
@@ -1319,11 +1319,9 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
         # FIXME: Update the url
-        # url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
-        #     backends[be]
-        # )
-        print("trtllm tag:", backends[be])
-        url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/main/tools/gen_trtllm_dockerfile.py"
+        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
+            backends[be]
+        )
 
         response = requests.get(url)
         spec = importlib.util.spec_from_loader(
@@ -1799,6 +1797,11 @@ def tensorrtllm_prebuild(cmake_script):
     cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
     cmake_script.cmd("export ARCH=$(uname -m)")
 
+    # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
+    # to get the build working for r23.10.
+    # patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch
+    cmake_script.cmd("export ARCH=$(uname -m)")
+
 
 def backend_build(
     be,

From 09bb2deb15368a717e65c54e7fa6b60ff7f11a84 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 14:54:10 -0700
Subject: [PATCH 05/12] Fix build for TRT-LLM backend

---
 build.py | 87 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 32 deletions(-)

diff --git a/build.py b/build.py
index de4b03c6be..3c0d03f611 100755
--- a/build.py
+++ b/build.py
@@ -78,7 +78,7 @@
         "2023.0.0",  # Standalone OpenVINO
         "2.4.7",  # DCGM version
         "py310_23.1.0-1",  # Conda version
-        "9.1.0.1",  # TRT version for building TRT-LLM backend
+        "9.1.0.3",  # TRT version for building TRT-LLM backend
         "12.2",  # CUDA version for building TRT-LLM backend
         "0.2.0",  # vLLM version
     )
@@ -884,19 +884,8 @@ def tensorrtllm_cmake_args(images):
             None,
             images["base"],
         ),
-        cmake_backend_arg(
-            "tensorrtllm",
-            "TENSORRT_VERSION",
-            None,
-            TRITON_VERSION_MAP[FLAGS.version][7],
-        ),
-        cmake_backend_arg(
-            "tensorrtllm",
-            "CUDA_VERSION",
-            None,
-            TRITON_VERSION_MAP[FLAGS.version][8],
-        ),
     ]
+    cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
     return cargs
 
 
@@ -1315,23 +1304,53 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     pip3 install --upgrade numpy && \
     rm -rf /var/lib/apt/lists/*
 """
+    # FIXME: Use the postbuild script here
     # Add dependencies needed for tensorrtllm backend
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
-        # FIXME: Update the url
-        url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
-            backends[be]
-        )
+        # # FIXME: Update the url
+        # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format(
+        #     backends[be]
+        # )
+
+        # response = requests.get(url)
+        # spec = importlib.util.spec_from_loader(
+        #     "trtllm_buildscript", loader=None, origin=url
+        # )
+        # trtllm_buildscript = importlib.util.module_from_spec(spec)
+        # exec(response.content, trtllm_buildscript.__dict__)
+        # df += trtllm_buildscript.create_postbuild(
+        #     backends[be] # repo tag
+        # )
+        df += """
+WORKDIR /workspace
 
-        response = requests.get(url)
-        spec = importlib.util.spec_from_loader(
-            "trtllm_buildscript", loader=None, origin=url
-        )
-        trtllm_buildscript = importlib.util.module_from_spec(spec)
-        exec(response.content, trtllm_buildscript.__dict__)
-        df += trtllm_buildscript.create_postbuild(
-            argmap["TRT_LLM_TRT_VERSION"], argmap["TRT_LLM_CUDA_VERSION"]
-        )
+# Install new version of TRT using the script from TRT-LLM
+RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
+RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend
+RUN cd tensorrtllm_backend && git submodule update --init --recursive
+RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
+RUN rm -fr tensorrtllm_backend
+    """.format(backends[be], os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"])
+
+        df += """
+RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
+ENV TRT_ROOT=/usr/local/tensorrt
+
+# Remove TRT contents that are not needed in runtime
+RUN ARCH="$(uname -i)" && \
+    rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
+    rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
+    rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
+
+# Uninstall unused nvidia packages
+RUN if pip freeze | grep -q "nvidia.*"; then \
+        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
+    fi
+RUN pip cache purge
+
+ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
+"""
 
     if "vllm" in backends:
         # [DLIS-5606] Build Conda environment for vLLM backend
@@ -1799,9 +1818,12 @@ def tensorrtllm_prebuild(cmake_script):
 
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
-    # patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch
-    cmake_script.cmd("export ARCH=$(uname -m)")
-
+    cmake_script.cmd("cd tensorrtllm_backend")
+    cmake_script.cmd("patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch")
+    cmake_script.cmd("mv inflight_batcher_llm/src .")
+    cmake_script.cmd("mv inflight_batcher_llm/cmake .")
+    cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .")
+    cmake_script.cmd("cd ..")
 
 def backend_build(
     be,
@@ -1825,9 +1847,10 @@ def backend_build(
     cmake_script.cwd(build_dir)
     # FIXME: Use GitHub repo
     if be == "tensorrtllm":
-        cmake_script.gitclone(
-            backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/krish"
-        )
+        # cmake_script.gitclone(
+        #     backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
+        # )
+        cmake_script.cmd("git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(tag, os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"]))
     else:
         cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 

From dafafe45d635963207a1a2fa849f48abcd4b76e1 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 15:00:45 -0700
Subject: [PATCH 06/12] Remove TRTLLM TRT and CUDA versions

---
 build.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/build.py b/build.py
index 3c0d03f611..801da6565c 100755
--- a/build.py
+++ b/build.py
@@ -78,8 +78,6 @@
         "2023.0.0",  # Standalone OpenVINO
         "2.4.7",  # DCGM version
         "py310_23.1.0-1",  # Conda version
-        "9.1.0.3",  # TRT version for building TRT-LLM backend
-        "12.2",  # CUDA version for building TRT-LLM backend
         "0.2.0",  # vLLM version
     )
 }
@@ -1331,7 +1329,11 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 RUN cd tensorrtllm_backend && git submodule update --init --recursive
 RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
 RUN rm -fr tensorrtllm_backend
-    """.format(backends[be], os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"])
+    """.format(
+            backends[be],
+            os.environ["REMOVE_ME_TRTLLM_USERNAME"],
+            os.environ["REMOVE_ME_TRTLLM_TOKEN"],
+        )
 
         df += """
 RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
@@ -1359,7 +1361,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 # vLLM needed for vLLM backend
 RUN pip3 install vllm=={}
 """.format(
-            TRITON_VERSION_MAP[FLAGS.version][9]
+            TRITON_VERSION_MAP[FLAGS.version][7]
         )
 
     df += """
@@ -1819,12 +1821,15 @@ def tensorrtllm_prebuild(cmake_script):
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
     cmake_script.cmd("cd tensorrtllm_backend")
-    cmake_script.cmd("patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch")
+    cmake_script.cmd(
+        "patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch"
+    )
     cmake_script.cmd("mv inflight_batcher_llm/src .")
     cmake_script.cmd("mv inflight_batcher_llm/cmake .")
     cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .")
     cmake_script.cmd("cd ..")
 
+
 def backend_build(
     be,
     cmake_script,
@@ -1850,7 +1855,13 @@ def backend_build(
         # cmake_script.gitclone(
         #     backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
         # )
-        cmake_script.cmd("git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(tag, os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"]))
+        cmake_script.cmd(
+            "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(
+                tag,
+                os.environ["REMOVE_ME_TRTLLM_USERNAME"],
+                os.environ["REMOVE_ME_TRTLLM_TOKEN"],
+            )
+        )
     else:
         cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 

From c51258085d1d7eeb32650de304e15941e9acfa92 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 15:06:21 -0700
Subject: [PATCH 07/12] Fix up unused var

---
 build.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/build.py b/build.py
index 801da6565c..b6635f93f4 100755
--- a/build.py
+++ b/build.py
@@ -1526,8 +1526,6 @@ def create_build_dockerfiles(
         if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
         else TRITON_VERSION_MAP[FLAGS.version][6],
     }
-    dockerfileargmap["TRT_LLM_TRT_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][7]
-    dockerfileargmap["TRT_LLM_CUDA_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][8]
 
     # For CPU-only image we need to copy some cuda libraries and dependencies
     # since we are using PyTorch and TensorFlow containers that

From 16ee5243ed4b1c376873504b29fd9ea3f76cf9fa Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 15:59:35 -0700
Subject: [PATCH 08/12] Fix up dir name

---
 build.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/build.py b/build.py
index b6635f93f4..92d8424e4e 100755
--- a/build.py
+++ b/build.py
@@ -1818,14 +1818,12 @@ def tensorrtllm_prebuild(cmake_script):
 
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
-    cmake_script.cmd("cd tensorrtllm_backend")
     cmake_script.cmd(
-        "patch inflight_batcher_llm/CMakeLists.txt  < inflight_batcher_llm/CMakeLists.txt.patch"
+        "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt  < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
     )
-    cmake_script.cmd("mv inflight_batcher_llm/src .")
-    cmake_script.cmd("mv inflight_batcher_llm/cmake .")
-    cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .")
-    cmake_script.cmd("cd ..")
+    cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
+    cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
+    cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")
 
 
 def backend_build(
@@ -1854,7 +1852,7 @@ def backend_build(
         #     backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp"
         # )
         cmake_script.cmd(
-            "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(
+            "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format(
                 tag,
                 os.environ["REMOVE_ME_TRTLLM_USERNAME"],
                 os.environ["REMOVE_ME_TRTLLM_TOKEN"],

From 8f8b15c81d63cf2e809a43ccea7c589bbfb4fc39 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 17:59:31 -0700
Subject: [PATCH 09/12] FIx cmake patch

---
 build.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/build.py b/build.py
index 92d8424e4e..54ebb0ca8a 100755
--- a/build.py
+++ b/build.py
@@ -1818,9 +1818,10 @@ def tensorrtllm_prebuild(cmake_script):
 
     # FIXME: Update the file structure to the one Triton expects. This is a temporary fix
     # to get the build working for r23.10.
-    cmake_script.cmd(
-        "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt  < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
-    )
+    # Uncomment the patch once moving to the GitHub repo
+    # cmake_script.cmd(
+    #     "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt  < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch"
+    # )
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm")
     cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm")

From 60fee56346fcc6a38d373aeb273bee4bcac89abc Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Tue, 10 Oct 2023 18:40:58 -0700
Subject: [PATCH 10/12] Remove previous TRT version

---
 build.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build.py b/build.py
index 54ebb0ca8a..71091681f3 100755
--- a/build.py
+++ b/build.py
@@ -1323,6 +1323,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         df += """
 WORKDIR /workspace
 
+# Remove previous TRT installation
+RUN apt-get remove --purge -y tensorrt* libnvinfer*
+RUN pip uninstall -y tensorrt
+
 # Install new version of TRT using the script from TRT-LLM
 RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
 RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend

From ed135b818b12aec83d150415d909744f909a5fdf Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Wed, 11 Oct 2023 14:06:08 -0700
Subject: [PATCH 11/12] Install required packages for example models

---
 build.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/build.py b/build.py
index 71091681f3..61bdddfd44 100755
--- a/build.py
+++ b/build.py
@@ -1355,6 +1355,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     fi
 RUN pip cache purge
 
+# Install required packages for example models
+RUN python3 -m pip install --upgrade pip && \
+        pip3 install transformers && \
+        pip3 install torch && \
+        pip3 install tritonclient[all] && \
+        pip3 install pandas && \
+        pip3 install tabulate
+
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """
 

From 6e22744847bbc32464ab7f313985b6e78a5051b9 Mon Sep 17 00:00:00 2001
From: krishung5 <krish@nvidia.com>
Date: Wed, 11 Oct 2023 17:50:19 -0700
Subject: [PATCH 12/12] Remove packages that are only needed for testing

---
 build.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/build.py b/build.py
index 61bdddfd44..7d14a26cc4 100755
--- a/build.py
+++ b/build.py
@@ -1349,20 +1349,17 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
     rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
 
+# Install required packages for TRT-LLM models
+RUN python3 -m pip install --upgrade pip && \
+        pip3 install transformers && \
+        pip3 install torch
+
 # Uninstall unused nvidia packages
 RUN if pip freeze | grep -q "nvidia.*"; then \
         pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
     fi
 RUN pip cache purge
 
-# Install required packages for example models
-RUN python3 -m pip install --upgrade pip && \
-        pip3 install transformers && \
-        pip3 install torch && \
-        pip3 install tritonclient[all] && \
-        pip3 install pandas && \
-        pip3 install tabulate
-
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
 """