diff --git a/build.py b/build.py index 2a91e4b570..88190ba484 100755 --- a/build.py +++ b/build.py @@ -819,7 +819,21 @@ def fastertransformer_cmake_args(): def tensorrtllm_cmake_args(images): - cargs = [] + cmake_script.cmd("apt-get update && apt-get install -y libcudnn8-dev && ldconfig") + cmake_script.cmd( + "python3 ../tensorrt_llm/scripts/build_wheel.py --trt_root /usr/local/tensorrt -i -c -j 18" + ) + cargs = [ + cmake_backend_arg( + "tensorrtllm", + "TRT_LIB_DIR", + None, + "${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib", + ), + cmake_backend_arg( + "tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include" + ), + ] cargs.append(cmake_backend_enable("tensorrtllm", "USE_CXX11_ABI", True)) return cargs @@ -1094,10 +1108,19 @@ def create_dockerfile_linux( RUN python3 -m pip install --upgrade pip \\ && pip3 install transformers +# Drop the static libs +RUN ARCH="$(uname -i)" \\ + && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\ + ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a + # Install TensorRT-LLM +RUN python3 -m pip install /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl -U --pre --extra-index-url https://pypi.nvidia.com \\ + && rm -fv /opt/tritonserver/backends/tensorrtllm/tensorrt_llm-*.whl RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf +RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0 + ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ with open(os.path.join(ddir, dockerfile_name), "w") as dfile: @@ -1702,6 +1725,11 @@ def tensorrtllm_postbuild(cmake_script, repo_install_dir, tensorrtllm_be_dir): # TODO: Update the CMakeLists.txt of TRT-LLM backend to install the artifacts to the correct location cmake_destination_dir = os.path.join(repo_install_dir, "backends/tensorrtllm") cmake_script.mkdir(cmake_destination_dir) + # Copy over the TRT-LLM wheel for later installation + cmake_script.cp( + os.path.join(tensorrtllm_be_dir, "tensorrt_llm", "build", "tensorrt_llm-*.whl"), + cmake_destination_dir, + ) # Copy over the TRT-LLM backend libraries cmake_script.cp(