From 91048f90605f527f732861a956aeb7d0acff5441 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 9 Oct 2023 18:35:09 -0700 Subject: [PATCH 01/12] Update url --- build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.py b/build.py index a811cd3123..92621f59bd 100755 --- a/build.py +++ b/build.py @@ -1319,7 +1319,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach if "tensorrtllm" in backends: be = "tensorrtllm" # FIXME: Update the url - url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( + url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( backends[be] ) @@ -1821,7 +1821,7 @@ def backend_build( # FIXME: Use GitHub repo if be == "tensorrtllm": cmake_script.gitclone( - backend_repo(be), tag, be, "https://gitlab-master.nvidia.com/krish" + backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/krish" ) else: cmake_script.gitclone(backend_repo(be), tag, be, github_organization) From a12d8a194d14e594b1ce11ebbc05c61431785c1f Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 9 Oct 2023 18:40:54 -0700 Subject: [PATCH 02/12] Debugging --- build.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/build.py b/build.py index 92621f59bd..043c06c206 100755 --- a/build.py +++ b/build.py @@ -1319,9 +1319,11 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach if "tensorrtllm" in backends: be = "tensorrtllm" # FIXME: Update the url - url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( - backends[be] - ) + # url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( + # backends[be] + # ) + print("trtllm tag:", backends[be]) + url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/krish-triton-changes/tools/gen_trtllm_dockerfile.py" response = requests.get(url) spec = importlib.util.spec_from_loader( From a244bca338f677dc46b4bf9fbf5c801980b186d3 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 9 Oct 2023 18:42:08 -0700 Subject: [PATCH 03/12] Debugging --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index 043c06c206..0d9f67fcdb 100755 --- a/build.py +++ b/build.py @@ -1323,7 +1323,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # backends[be] # ) print("trtllm tag:", backends[be]) - url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/krish-triton-changes/tools/gen_trtllm_dockerfile.py" + url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/main/tools/gen_trtllm_dockerfile.py" response = requests.get(url) spec = importlib.util.spec_from_loader( From 562518b875772228341a43346c652ab390e1d155 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 9 Oct 2023 20:05:30 -0700 Subject: [PATCH 04/12] Update url --- build.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/build.py b/build.py index 0d9f67fcdb..de4b03c6be 100755 --- a/build.py +++ b/build.py @@ -1319,11 +1319,9 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach if "tensorrtllm" in backends: be = "tensorrtllm" # FIXME: Update the url - # url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( - # backends[be] - # ) - print("trtllm tag:", backends[be]) - url = "https://gitlab-master.nvidia.com/krish/tensorrtllm_backend/-/raw/main/tools/gen_trtllm_dockerfile.py" + url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( + backends[be] + ) response = requests.get(url) spec = importlib.util.spec_from_loader( @@ -1799,6 +1797,11 @@ def tensorrtllm_prebuild(cmake_script): cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt") cmake_script.cmd("export ARCH=$(uname -m)") + # FIXME: Update the file structure to the one Triton expects. This is a temporary fix + # to get the build working for r23.10. + # patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch + cmake_script.cmd("export ARCH=$(uname -m)") + def backend_build( be, From 09bb2deb15368a717e65c54e7fa6b60ff7f11a84 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 14:54:10 -0700 Subject: [PATCH 05/12] Fix build for TRT-LLM backend --- build.py | 87 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 32 deletions(-) diff --git a/build.py b/build.py index de4b03c6be..3c0d03f611 100755 --- a/build.py +++ b/build.py @@ -78,7 +78,7 @@ "2023.0.0", # Standalone OpenVINO "2.4.7", # DCGM version "py310_23.1.0-1", # Conda version - "9.1.0.1", # TRT version for building TRT-LLM backend + "9.1.0.3", # TRT version for building TRT-LLM backend "12.2", # CUDA version for building TRT-LLM backend "0.2.0", # vLLM version ) @@ -884,19 +884,8 @@ def tensorrtllm_cmake_args(images): None, images["base"], ), - cmake_backend_arg( - "tensorrtllm", - "TENSORRT_VERSION", - None, - TRITON_VERSION_MAP[FLAGS.version][7], - ), - cmake_backend_arg( - "tensorrtllm", - "CUDA_VERSION", - None, - TRITON_VERSION_MAP[FLAGS.version][8], - ), ] + cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True)) return cargs @@ -1315,23 +1304,53 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach pip3 install --upgrade numpy && \ rm -rf /var/lib/apt/lists/* """ + # FIXME: Use the postbuild script here # Add dependencies needed for tensorrtllm backend if "tensorrtllm" in backends: be = "tensorrtllm" - # FIXME: Update the url - url = "https://gitlab-master.nvidia.com/krish/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( - backends[be] - ) + # # FIXME: Update the url + # url = "https://gitlab-master.nvidia.com/ftp/tekit_backend/-/raw/{}/tools/gen_trtllm_dockerfile.py".format( + # backends[be] + # ) + + # response = requests.get(url) + # spec = importlib.util.spec_from_loader( + # "trtllm_buildscript", loader=None, origin=url + # ) + # trtllm_buildscript = importlib.util.module_from_spec(spec) + # exec(response.content, trtllm_buildscript.__dict__) + # df += trtllm_buildscript.create_postbuild( + # backends[be] # repo tag + # ) + df += """ +WORKDIR /workspace - response = requests.get(url) - spec = importlib.util.spec_from_loader( - "trtllm_buildscript", loader=None, origin=url - ) - trtllm_buildscript = importlib.util.module_from_spec(spec) - exec(response.content, trtllm_buildscript.__dict__) - df += trtllm_buildscript.create_postbuild( - argmap["TRT_LLM_TRT_VERSION"], argmap["TRT_LLM_CUDA_VERSION"] - ) +# Install new version of TRT using the script from TRT-LLM +RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3 +RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend +RUN cd tensorrtllm_backend && git submodule update --init --recursive +RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/ +RUN rm -fr tensorrtllm_backend + """.format(backends[be], os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"]) + + df += """ +RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh +ENV TRT_ROOT=/usr/local/tensorrt + +# Remove TRT contents that are not needed in runtime +RUN ARCH="$(uname -i)" && \ + rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ + rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ + rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples + +# Uninstall unused nvidia packages +RUN if pip freeze | grep -q "nvidia.*"; then \ + pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \ + fi +RUN pip cache purge + +ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH +""" if "vllm" in backends: # [DLIS-5606] Build Conda environment for vLLM backend @@ -1799,9 +1818,12 @@ def tensorrtllm_prebuild(cmake_script): # FIXME: Update the file structure to the one Triton expects. This is a temporary fix # to get the build working for r23.10. - # patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch - cmake_script.cmd("export ARCH=$(uname -m)") - + cmake_script.cmd("cd tensorrtllm_backend") + cmake_script.cmd("patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch") + cmake_script.cmd("mv inflight_batcher_llm/src .") + cmake_script.cmd("mv inflight_batcher_llm/cmake .") + cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .") + cmake_script.cmd("cd ..") def backend_build( be, @@ -1825,9 +1847,10 @@ def backend_build( cmake_script.cwd(build_dir) # FIXME: Use GitHub repo if be == "tensorrtllm": - cmake_script.gitclone( - backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/krish" - ) + # cmake_script.gitclone( + # backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp" + # ) + cmake_script.cmd("git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(tag, os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"])) else: cmake_script.gitclone(backend_repo(be), tag, be, github_organization) From dafafe45d635963207a1a2fa849f48abcd4b76e1 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 15:00:45 -0700 Subject: [PATCH 06/12] Remove TRTLLM TRT and CUDA versions --- build.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/build.py b/build.py index 3c0d03f611..801da6565c 100755 --- a/build.py +++ b/build.py @@ -78,8 +78,6 @@ "2023.0.0", # Standalone OpenVINO "2.4.7", # DCGM version "py310_23.1.0-1", # Conda version - "9.1.0.3", # TRT version for building TRT-LLM backend - "12.2", # CUDA version for building TRT-LLM backend "0.2.0", # vLLM version ) } @@ -1331,7 +1329,11 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach RUN cd tensorrtllm_backend && git submodule update --init --recursive RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/ RUN rm -fr tensorrtllm_backend - """.format(backends[be], os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"]) + """.format( + backends[be], + os.environ["REMOVE_ME_TRTLLM_USERNAME"], + os.environ["REMOVE_ME_TRTLLM_TOKEN"], + ) df += """ RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh @@ -1359,7 +1361,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # vLLM needed for vLLM backend RUN pip3 install vllm=={} """.format( - TRITON_VERSION_MAP[FLAGS.version][9] + TRITON_VERSION_MAP[FLAGS.version][7] ) df += """ @@ -1819,12 +1821,15 @@ def tensorrtllm_prebuild(cmake_script): # FIXME: Update the file structure to the one Triton expects. This is a temporary fix # to get the build working for r23.10. cmake_script.cmd("cd tensorrtllm_backend") - cmake_script.cmd("patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch") + cmake_script.cmd( + "patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch" + ) cmake_script.cmd("mv inflight_batcher_llm/src .") cmake_script.cmd("mv inflight_batcher_llm/cmake .") cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .") cmake_script.cmd("cd ..") + def backend_build( be, cmake_script, @@ -1850,7 +1855,13 @@ def backend_build( # cmake_script.gitclone( # backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp" # ) - cmake_script.cmd("git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format(tag, os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"])) + cmake_script.cmd( + "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format( + tag, + os.environ["REMOVE_ME_TRTLLM_USERNAME"], + os.environ["REMOVE_ME_TRTLLM_TOKEN"], + ) + ) else: cmake_script.gitclone(backend_repo(be), tag, be, github_organization) From c51258085d1d7eeb32650de304e15941e9acfa92 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 15:06:21 -0700 Subject: [PATCH 07/12] Fix up unused var --- build.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/build.py b/build.py index 801da6565c..b6635f93f4 100755 --- a/build.py +++ b/build.py @@ -1526,8 +1526,6 @@ def create_build_dockerfiles( if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP else TRITON_VERSION_MAP[FLAGS.version][6], } - dockerfileargmap["TRT_LLM_TRT_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][7] - dockerfileargmap["TRT_LLM_CUDA_VERSION"] = TRITON_VERSION_MAP[FLAGS.version][8] # For CPU-only image we need to copy some cuda libraries and dependencies # since we are using PyTorch and TensorFlow containers that From 16ee5243ed4b1c376873504b29fd9ea3f76cf9fa Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 15:59:35 -0700 Subject: [PATCH 08/12] Fix up dir name --- build.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/build.py b/build.py index b6635f93f4..92d8424e4e 100755 --- a/build.py +++ b/build.py @@ -1818,14 +1818,12 @@ def tensorrtllm_prebuild(cmake_script): # FIXME: Update the file structure to the one Triton expects. This is a temporary fix # to get the build working for r23.10. - cmake_script.cmd("cd tensorrtllm_backend") cmake_script.cmd( - "patch inflight_batcher_llm/CMakeLists.txt < inflight_batcher_llm/CMakeLists.txt.patch" + "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch" ) - cmake_script.cmd("mv inflight_batcher_llm/src .") - cmake_script.cmd("mv inflight_batcher_llm/cmake .") - cmake_script.cmd("mv inflight_batcher_llm/CMakeLists.txt .") - cmake_script.cmd("cd ..") + cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm") + cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm") + cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm") def backend_build( @@ -1854,7 +1852,7 @@ def backend_build( # backend_repo("tekit"), tag, be, "https://gitlab-master.nvidia.com/ftp" # ) cmake_script.cmd( - "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend".format( + "git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm".format( tag, os.environ["REMOVE_ME_TRTLLM_USERNAME"], os.environ["REMOVE_ME_TRTLLM_TOKEN"], From 8f8b15c81d63cf2e809a43ccea7c589bbfb4fc39 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 17:59:31 -0700 Subject: [PATCH 09/12] FIx cmake patch --- build.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/build.py b/build.py index 92d8424e4e..54ebb0ca8a 100755 --- a/build.py +++ b/build.py @@ -1818,9 +1818,10 @@ def tensorrtllm_prebuild(cmake_script): # FIXME: Update the file structure to the one Triton expects. This is a temporary fix # to get the build working for r23.10. - cmake_script.cmd( - "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch" - ) + # Uncomment the patch once moving to the GitHub repo + # cmake_script.cmd( + # "patch tensorrtllm/inflight_batcher_llm/CMakeLists.txt < tensorrtllm/inflight_batcher_llm/CMakeLists.txt.patch" + # ) cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/src tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/cmake tensorrtllm") cmake_script.cmd("mv tensorrtllm/inflight_batcher_llm/CMakeLists.txt tensorrtllm") From 60fee56346fcc6a38d373aeb273bee4bcac89abc Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 10 Oct 2023 18:40:58 -0700 Subject: [PATCH 10/12] Remove previous TRT version --- build.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.py b/build.py index 54ebb0ca8a..71091681f3 100755 --- a/build.py +++ b/build.py @@ -1323,6 +1323,10 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach df += """ WORKDIR /workspace +# Remove previous TRT installation +RUN apt-get remove --purge -y tensorrt* libnvinfer* +RUN pip uninstall -y tensorrt + # Install new version of TRT using the script from TRT-LLM RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3 RUN git clone --single-branch --depth=1 -b {} https://{}:{}@gitlab-master.nvidia.com/ftp/tekit_backend.git tensorrtllm_backend From ed135b818b12aec83d150415d909744f909a5fdf Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 11 Oct 2023 14:06:08 -0700 Subject: [PATCH 11/12] Install required packages for example models --- build.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/build.py b/build.py index 71091681f3..61bdddfd44 100755 --- a/build.py +++ b/build.py @@ -1355,6 +1355,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach fi RUN pip cache purge +# Install required packages for example models +RUN python3 -m pip install --upgrade pip && \ + pip3 install transformers && \ + pip3 install torch && \ + pip3 install tritonclient[all] && \ + pip3 install pandas && \ + pip3 install tabulate + ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ From 6e22744847bbc32464ab7f313985b6e78a5051b9 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 11 Oct 2023 17:50:19 -0700 Subject: [PATCH 12/12] Remove packages that are only needed for testing --- build.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/build.py b/build.py index 61bdddfd44..7d14a26cc4 100755 --- a/build.py +++ b/build.py @@ -1349,20 +1349,17 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples +# Install required packages for TRT-LLM models +RUN python3 -m pip install --upgrade pip && \ + pip3 install transformers && \ + pip3 install torch + # Uninstall unused nvidia packages RUN if pip freeze | grep -q "nvidia.*"; then \ pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \ fi RUN pip cache purge -# Install required packages for example models -RUN python3 -m pip install --upgrade pip && \ - pip3 install transformers && \ - pip3 install torch && \ - pip3 install tritonclient[all] && \ - pip3 install pandas && \ - pip3 install tabulate - ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """