From fb2b2e547e277b677ed01275965812a7cb167da0 Mon Sep 17 00:00:00 2001 From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com> Date: Mon, 22 Apr 2024 19:02:18 -0700 Subject: [PATCH] Mchornyi krish 24.04 (#7149) * Enable TensorRT-LLM build outside of CMake * TensorRT-LLM requires lower version of cuDNN --- build.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build.py b/build.py index 6e218289487..ac50c9d1f4d 100755 --- a/build.py +++ b/build.py @@ -819,6 +819,8 @@ def fastertransformer_cmake_args(): def tensorrtllm_cmake_args(images): + cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig") + cmake_script.cmd("python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt") cargs = [ cmake_backend_arg( "tensorrtllm", @@ -830,7 +832,6 @@ def tensorrtllm_cmake_args(images): "tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include" ), ] - cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True)) cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True)) return cargs @@ -1094,6 +1095,8 @@ def create_dockerfile_linux( if "tensorrtllm" in backends: df += """ # Remove TRT contents that are not needed in runtime +RUN apt-get update && apt-get install -y libcudnn8-dev && ldconfig + RUN ARCH="$(uname -i)" \\ && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\ && rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\