From b0d461299bef727f3becc04768f54a6545e4d49a Mon Sep 17 00:00:00 2001
From: Kris Hung <krish@nvidia.com>
Date: Thu, 2 Nov 2023 14:28:48 -0700
Subject: [PATCH] Use post build function for TRT-LLM backend (#6476)

* Use postbuild function

* Remove updating submodule url
---
 build.py | 67 ++++++++------------------------------------------------
 1 file changed, 9 insertions(+), 58 deletions(-)

diff --git a/build.py b/build.py
index 5b717811f3..ed9ed535fd 100755
--- a/build.py
+++ b/build.py
@@ -1305,54 +1305,17 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     # Add dependencies needed for tensorrtllm backend
     if "tensorrtllm" in backends:
         be = "tensorrtllm"
-        # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
-        #     backends[be]
-        # )
-
-        # response = requests.get(url)
-        # spec = importlib.util.spec_from_loader(
-        #     "trtllm_buildscript", loader=None, origin=url
-        # )
-        # trtllm_buildscript = importlib.util.module_from_spec(spec)
-        # exec(response.content, trtllm_buildscript.__dict__)
-        # df += trtllm_buildscript.create_postbuild(backends[be])
-
-        df += """
-WORKDIR /workspace
-# Remove previous TRT installation
-RUN apt-get remove --purge -y tensorrt* libnvinfer*
-RUN pip uninstall -y tensorrt
-# Install new version of TRT using the script from TRT-LLM
-RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3
-RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend
-RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git
-RUN cd tensorrtllm_backend && git submodule sync
-RUN cd tensorrtllm_backend && git submodule update --init --recursive
-RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/
-RUN rm -fr tensorrtllm_backend
-    """.format(
+        url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format(
             backends[be]
         )
 
-        df += """
-RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh
-ENV TRT_ROOT=/usr/local/tensorrt
-# Remove TRT contents that are not needed in runtime
-RUN ARCH="$(uname -i)" && \
-    rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \
-    rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \
-    rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
-# Install required packages for TRT-LLM models
-RUN python3 -m pip install --upgrade pip && \
-        pip3 install transformers && \
-        pip3 install torch
-# Uninstall unused nvidia packages
-RUN if pip freeze | grep -q "nvidia.*"; then \
-        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \
-    fi
-RUN pip cache purge
-ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
-"""
+        response = requests.get(url)
+        spec = importlib.util.spec_from_loader(
+            "trtllm_buildscript", loader=None, origin=url
+        )
+        trtllm_buildscript = importlib.util.module_from_spec(spec)
+        exec(response.content, trtllm_buildscript.__dict__)
+        df += trtllm_buildscript.create_postbuild(backends[be])
 
     if "vllm" in backends:
         # [DLIS-5606] Build Conda environment for vLLM backend
@@ -1843,22 +1806,10 @@ def backend_build(
     cmake_script.comment()
     cmake_script.mkdir(build_dir)
     cmake_script.cwd(build_dir)
+    cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
     if be == "tensorrtllm":
-        cmake_script.cmd(
-            "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format(
-                tag
-            )
-        )
-        cmake_script.cmd("cd tensorrtllm")
-        cmake_script.cmd(
-            "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git"
-        )
-        cmake_script.cmd("git submodule sync")
-        cmake_script.cmd("cd ..")
         tensorrtllm_prebuild(cmake_script)
-    else:
-        cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
 
     cmake_script.mkdir(repo_build_dir)
     cmake_script.cwd(repo_build_dir)