From b0d461299bef727f3becc04768f54a6545e4d49a Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Thu, 2 Nov 2023 14:28:48 -0700 Subject: [PATCH] Use post build function for TRT-LLM backend (#6476) * Use postbuild function * Remove updating submodule url --- build.py | 67 ++++++++------------------------------------------------ 1 file changed, 9 insertions(+), 58 deletions(-) diff --git a/build.py b/build.py index 5b717811f3..ed9ed535fd 100755 --- a/build.py +++ b/build.py @@ -1305,54 +1305,17 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # Add dependencies needed for tensorrtllm backend if "tensorrtllm" in backends: be = "tensorrtllm" - # url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format( - # backends[be] - # ) - - # response = requests.get(url) - # spec = importlib.util.spec_from_loader( - # "trtllm_buildscript", loader=None, origin=url - # ) - # trtllm_buildscript = importlib.util.module_from_spec(spec) - # exec(response.content, trtllm_buildscript.__dict__) - # df += trtllm_buildscript.create_postbuild(backends[be]) - - df += """ -WORKDIR /workspace -# Remove previous TRT installation -RUN apt-get remove --purge -y tensorrt* libnvinfer* -RUN pip uninstall -y tensorrt -# Install new version of TRT using the script from TRT-LLM -RUN apt-get update && apt-get install -y --no-install-recommends python-is-python3 -RUN git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend.git tensorrtllm_backend -RUN cd tensorrtllm_backend && git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git -RUN cd tensorrtllm_backend && git submodule sync -RUN cd tensorrtllm_backend && git submodule update --init --recursive -RUN cp tensorrtllm_backend/tensorrt_llm/docker/common/install_tensorrt.sh /tmp/ -RUN rm -fr tensorrtllm_backend - """.format( + url = "https://raw.githubusercontent.com/triton-inference-server/tensorrtllm_backend/{}/tools/gen_trtllm_dockerfile.py".format( backends[be] ) - df += """ -RUN bash /tmp/install_tensorrt.sh && rm /tmp/install_tensorrt.sh -ENV TRT_ROOT=/usr/local/tensorrt -# Remove TRT contents that are not needed in runtime -RUN ARCH="$(uname -i)" && \ - rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ - rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ - rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples -# Install required packages for TRT-LLM models -RUN python3 -m pip install --upgrade pip && \ - pip3 install transformers && \ - pip3 install torch -# Uninstall unused nvidia packages -RUN if pip freeze | grep -q "nvidia.*"; then \ - pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \ - fi -RUN pip cache purge -ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH -""" + response = requests.get(url) + spec = importlib.util.spec_from_loader( + "trtllm_buildscript", loader=None, origin=url + ) + trtllm_buildscript = importlib.util.module_from_spec(spec) + exec(response.content, trtllm_buildscript.__dict__) + df += trtllm_buildscript.create_postbuild(backends[be]) if "vllm" in backends: # [DLIS-5606] Build Conda environment for vLLM backend @@ -1843,22 +1806,10 @@ def backend_build( cmake_script.comment() cmake_script.mkdir(build_dir) cmake_script.cwd(build_dir) + cmake_script.gitclone(backend_repo(be), tag, be, github_organization) if be == "tensorrtllm": - cmake_script.cmd( - "git clone --single-branch --depth=1 -b {} https://github.com/triton-inference-server/tensorrtllm_backend tensorrtllm".format( - tag - ) - ) - cmake_script.cmd("cd tensorrtllm") - cmake_script.cmd( - "git submodule set-url -- tensorrt_llm https://github.com/NVIDIA/TensorRT-LLM.git" - ) - cmake_script.cmd("git submodule sync") - cmake_script.cmd("cd ..") tensorrtllm_prebuild(cmake_script) - else: - cmake_script.gitclone(backend_repo(be), tag, be, github_organization) cmake_script.mkdir(repo_build_dir) cmake_script.cwd(repo_build_dir)